diff --git a/integrationtests/test_issues.py b/integrationtests/test_issues.py
index 814e82ad75512ec8fe217294e1a9e86c6aa01ab3..76392f3a4ce20d7ed6b6ccc30c79f1ce400001f7 100644
--- a/integrationtests/test_issues.py
+++ b/integrationtests/test_issues.py
@@ -16,20 +16,18 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program. If not, see <https://www.gnu.org/licenses/>.
 #
-from pytest import fixture, mark, raises
-
 import linkahead as db
-from linkahead.cached import cache_clear
 from caosadvancedtools.models.parser import parse_model_from_string
-
 from caoscrawler.crawl import Crawler
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.scanner import (create_converter_registry,
+                                 scan_structure_elements)
 from caoscrawler.structure_elements import DictElement
-
-from caoscrawler.scanner import create_converter_registry, scan_structure_elements
-
+from linkahead.cached import cache_clear
 from linkahead.utils.register_tests import clear_database, set_test_key
+from pytest import fixture, mark, raises
+
 set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
 
 
@@ -171,8 +169,9 @@ def test_issue_83(clear_database):
         name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1])
     referencing2 = db.Record(name="Referencing2").add_parent(
         name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target2])
-    referencing3 = db.Record(name="Referencing3").add_parent(name=referencing_type.name).add_property(
-        name=referenced_type.name, value=[ref_target1, ref_target2])
+    referencing3 = db.Record(name="Referencing3").add_parent(
+        name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1,
+                                                                                   ref_target2])
 
     records = db.Container().extend(
         [ref_target1, ref_target2, referencing1, referencing2, referencing3])
diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py
index 1a7f4d56da65c2aa3c436435f3734754ea037cc0..6280f16cc58c96145bcafca12437d18b0a4b63ba 100644
--- a/src/caoscrawler/converters.py
+++ b/src/caoscrawler/converters.py
@@ -34,7 +34,7 @@ import warnings
 from abc import ABCMeta, abstractmethod
 from inspect import signature
 from string import Template
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Optional, Union
 
 import linkahead as db
 import pandas as pd
@@ -138,8 +138,8 @@ def replace_variables(propvalue: Any, values: GeneralStore):
     This function replaces variables in property values (and possibly other locations,
     where the crawler can replace cfood-internal variables).
 
-    If `propvalue` is a single variable name preceeded with a '$' (e.g. '$var' or '${var}'), then
-    the corresponding value stored in `values` is returned.
+    If ``propvalue`` is a single variable name preceeded by a ``$`` (e.g. ``$var`` or ``${var}``),
+    then the corresponding value stored in ``values`` is returned.
     In any other case the variable substitution is carried out as defined by string templates
     and a new string with the replaced variables is returned.
     """
@@ -164,16 +164,16 @@ def handle_value(value: Union[dict, str, list], values: GeneralStore):
     add as an additional property (multiproperty).
 
     Variable names (starting with a "$") are replaced by the corresponding value stored in the
-    `values` GeneralStore.
+    ``values`` GeneralStore.
 
 Parameters
 ----------
 
-value:
-  - if str, the value to be interpreted. E.g. "4", "hallo" or "$a" etc.
-  - if dict, must have keys "value" and "collection_mode". The returned tuple is directly
+value: Union[dict, str, list]
+  - If *str*, the value to be interpreted. E.g. "4", "hello" or "$a" etc.
+  - If *dict*, must have keys ``value`` and ``collection_mode``. The returned tuple is directly
     created from the corresponding values.
-  - if list, each element is checked for replacement and the resulting list will be used
+  - If *list*, each element is checked for replacement and the resulting list will be used
     as (list) value for the property
 
 Returns
@@ -185,7 +185,7 @@ out: tuple
     """
     # @review Florian Spreckelsen 2022-05-13
 
-    if type(value) == dict:
+    if isinstance(value, dict):
         if "value" not in value:
             # TODO: how do we handle this case? Just ignore?
             #       or disallow?
@@ -193,7 +193,7 @@ out: tuple
         propvalue = value["value"]
         # can be "single", "list" or "multiproperty"
         collection_mode = value["collection_mode"]
-    elif type(value) == str:
+    elif isinstance(value, str):
         propvalue = value
         collection_mode = "single"
         if propvalue.startswith("+"):
@@ -202,7 +202,7 @@ out: tuple
         elif propvalue.startswith("*"):
             collection_mode = "multiproperty"
             propvalue = propvalue[1:]
-    elif type(value) == list:
+    elif isinstance(value, list):
         # TODO: (for review)
         #       This is a bit dirty right now and needed for
         #       being able to directly set list values. Semantics is, however, a bit
@@ -213,7 +213,7 @@ out: tuple
         propvalue = list()
         for element in value:
             # Do the element-wise replacement only, when its type is string:
-            if type(element) == str:
+            if isinstance(element, str):
                 propvalue.append(replace_variables(element, values))
             else:
                 propvalue.append(element)
@@ -326,10 +326,12 @@ class Converter(object, metaclass=ABCMeta):
 
         Parameters
         ----------
-        definition: dict, Please refer to ``src/doc/converters.rst`` to learn about the structure
-                    that the definition dict must have.
-        converter_registry: dict, A dictionary that contains converter names as keys and dicts as
-                            values. Those value dicts have the keys 'converter' and 'package'.
+        definition: dict
+          Please refer to ``src/doc/converters.rst`` to learn about the structure that the
+          definition dict must have.
+        converter_registry: dict
+          A dictionary that contains converter names as keys and dicts as values. Those value dicts
+          have the keys 'converter' and 'package'.
         """
 
         self.definition = definition
@@ -428,7 +430,7 @@ class Converter(object, metaclass=ABCMeta):
                 pass
         """
 
-        if not "transform" in self.definition:
+        if "transform" not in self.definition:
             return
         for transformer_key, transformer in self.definition["transform"].items():
             in_value = replace_variables(transformer["in"], values)
@@ -464,13 +466,13 @@ class Converter(object, metaclass=ABCMeta):
             values[match.group('varname')] = out_value
 
     @abstractmethod
-    def create_children(self, values: GeneralStore,
-                        element: StructureElement):
+    def create_children(self, values: GeneralStore, element: StructureElement):
         pass
 
     def create_records(self, values: GeneralStore, records: RecordStore,
                        element: StructureElement):
         # TODO why is element passed but not used???
+        # ANSWER: because it might be used by overriding child classes.
 
         if "records" not in self.definition:
             return []
@@ -481,7 +483,7 @@ class Converter(object, metaclass=ABCMeta):
                               self.definition["records"])
 
     def filter_children(self, children_with_strings:
-                        List[Tuple[StructureElement, str]], expr: str,
+                        list[tuple[StructureElement, str]], expr: str,
                         group: str, rule: str):
         """Filter children according to regexp `expr` and `rule`."""
 
@@ -624,7 +626,7 @@ class DirectoryConverter(Converter):
 
         element: A directory (of type Directory) which will be traversed.
         """
-        children: List[StructureElement] = []
+        children: list[StructureElement] = []
 
         for name in sorted(os.listdir(element.path)):
             path = os.path.join(element.path, name)
@@ -664,7 +666,7 @@ class SimpleFileConverter(Converter):
 class FileConverter(SimpleFileConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use SimpleFileConverter."))
+            "This class is deprecated. Please use SimpleFileConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -697,12 +699,12 @@ class MarkdownFileConverter(SimpleFileConverter):
                 "Error during the validation (yaml header cannot be read) of the markdown file "
                 "located at the following node in the data structure:\n"
                 "{}\nError:\n{}".format(path, err))
-        children: List[StructureElement] = []
+        children: list[StructureElement] = []
 
         for name, entry in header.items():
-            if type(entry) == list:
+            if isinstance(entry, list):
                 children.append(ListElement(name, entry))
-            elif type(entry) == str:
+            elif isinstance(entry, str):
                 children.append(TextElement(name, entry))
             else:
                 if generalStore is not None and self.name in generalStore:
@@ -717,7 +719,9 @@ class MarkdownFileConverter(SimpleFileConverter):
 def convert_basic_element(element: Union[list, dict, bool, int, float, str, None], name=None,
                           msg_prefix=""):
     """Convert basic Python objects to the corresponding StructureElements"""
-    if isinstance(element, list):
+    if isinstance(element, StructureElement):
+        return element
+    elif isinstance(element, list):
         return ListElement(name, element)
     elif isinstance(element, dict):
         return DictElement(name, element)
@@ -818,14 +822,14 @@ class DictElementConverter(Converter):
 class DictConverter(DictElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use DictConverter."))
+            "This class is deprecated. Please use DictConverter."))
         super().__init__(*args, **kwargs)
 
 
 class DictDictElementConverter(DictElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use DictElementConverter."))
+            "This class is deprecated. Please use DictElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1013,7 +1017,7 @@ class BooleanElementConverter(_AbstractScalarValueElementConverter):
 class DictBooleanElementConverter(BooleanElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use BooleanElementConverter."))
+            "This class is deprecated. Please use BooleanElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1029,7 +1033,7 @@ class FloatElementConverter(_AbstractScalarValueElementConverter):
 class DictFloatElementConverter(FloatElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use FloatElementConverter."))
+            "This class is deprecated. Please use FloatElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1054,7 +1058,7 @@ the 'match_value' key to match the value of the TextElement and 'match_name' for
 class DictTextElementConverter(TextElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use TextElementConverter."))
+            "This class is deprecated. Please use TextElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1070,7 +1074,7 @@ class IntegerElementConverter(_AbstractScalarValueElementConverter):
 class DictIntegerElementConverter(IntegerElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use IntegerElementConverter."))
+            "This class is deprecated. Please use IntegerElementConverter."))
         super().__init__(*args, **kwargs)
 
 
@@ -1080,7 +1084,7 @@ class ListElementConverter(Converter):
         # TODO: See comment on types and inheritance
         if not isinstance(element, ListElement):
             raise RuntimeError(
-                "This converter can only process DictListElements.")
+                "This converter can only process ListElements.")
         children: list[StructureElement] = []
         for index, list_element in enumerate(element.value):
             children.append(
@@ -1112,7 +1116,7 @@ class ListElementConverter(Converter):
 class DictListElementConverter(ListElementConverter):
     def __init__(self, *args, **kwargs):
         warnings.warn(DeprecationWarning(
-            "This class is depricated. Please use ListElementConverter."))
+            "This class is deprecated. Please use ListElementConverter."))
         super().__init__(*args, **kwargs)
 
 
diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py
index c926df4443fa461505d428e1738db43f8683def2..928bf472e97daa09bfdf6fd742b981d981ab9204 100644
--- a/src/caoscrawler/crawl.py
+++ b/src/caoscrawler/crawl.py
@@ -51,26 +51,24 @@ from caosadvancedtools.cache import UpdateCache
 from caosadvancedtools.crawler import Crawler as OldCrawler
 from caosadvancedtools.serverside.helper import send_mail
 from caosadvancedtools.utils import create_entity_link
-from linkahead.apiutils import (EntityMergeConflictError, compare_entities,
+from linkahead.apiutils import (compare_entities,
                                 merge_entities)
 from linkahead.cached import cache_clear, cached_get_entity_by
 from linkahead.common.datatype import get_list_datatype, is_reference
-from linkahead.exceptions import EmptyUniqueQueryError
 from linkahead.utils.escape import escape_squoted_text
 
 from .config import get_config_setting
 from .converters import Converter, ConverterValidationError
 from .debug_tree import DebugTree
-from .identifiable import Identifiable
 from .identifiable_adapters import (CaosDBIdentifiableAdapter,
-                                    IdentifiableAdapter,
-                                    LocalStorageIdentifiableAdapter)
+                                    IdentifiableAdapter)
 from .logging import configure_server_side_logging
 from .macros import defmacro_constructor, macro_constructor
 from .scanner import (create_converter_registry, initialize_converters,
                       load_definition, scan_directory, scan_structure_elements)
 from .stores import GeneralStore
 from .structure_elements import StructureElement
+from .sync_graph import SyncGraph
 
 logger = logging.getLogger(__name__)
 
@@ -172,163 +170,12 @@ def _resolve_datatype(prop: db.Property, remote_entity: db.Entity):
     return prop
 
 
-def _treat_merge_error_of(newrecord, record):
-    """
-    The parameters are two entities that cannot be merged with the merge_entities function.
-
-    # This function checks for two obvious cases where no merge will ever be possible:
-    # 1. Two Entities with differing IDs
-    # 2. Two non-Entity values which differ
-
-    It creates a more informative logger message and raises an Exception in those cases.
-    """
-    for this_p in newrecord.properties:
-        that_p = record.get_property(this_p.name)
-
-        if that_p is None:
-            logger.debug(f"Property {this_p.name} does not exist in the second entity. Note that "
-                         "this should not be the reason for the merge conflict.")
-            continue
-
-        if (isinstance(this_p.value, db.Entity)
-                and isinstance(that_p.value, db.Entity)):
-            if this_p.value.id is not None and that_p.value.id is not None:
-                if this_p.value.id != that_p.value.id:
-                    logger.error("The Crawler is trying to merge two entities "
-                                 "because they should be the same object (same"
-                                 " identifiables), but they reference "
-                                 "different Entities with the same Property."
-                                 f"Problematic Property: {this_p.name}\n"
-                                 f"Referenced Entities: {this_p.value.id} and "
-                                 f"{that_p.value.id}\n"
-                                 f"{record}\n{newrecord}")
-                    raise RuntimeError("Cannot merge Entities")
-        elif (not isinstance(this_p.value, db.Entity)
-              and not isinstance(that_p.value, db.Entity)):
-            if ((this_p.value != that_p.value)
-                # TODO can we also compare lists?
-                and not isinstance(this_p.value, list)
-                    and not isinstance(that_p.value, list)):
-                logger.error(
-                    "The Crawler is trying to merge two entities because they should be the same "
-                    "object (same identifiables), but they have different values for the same "
-                    "Property.\n"
-                    f"Problematic Property: {this_p.name}\n"
-                    f"Values: {this_p.value} and {that_p.value}\n"
-                    f"{record}\n{newrecord}")
-                raise RuntimeError("Cannot merge Entities")
-
-
 class SecurityMode(Enum):
     RETRIEVE = 0
     INSERT = 1
     UPDATE = 2
 
 
-class TreatedRecordLookUp():
-    """tracks Records and Identifiables for which it was checked whether they exist in the remote
-    server
-
-    For a given Record it can be checked, whether it exists in the remote sever if
-    - it has a (valid) ID
-    - it has a (valid) path (FILEs only)
-    - an identifiable can be created for the Record.
-
-    Records are added by calling the `add` function and they are then added to the internal
-    existing or missing list depending on whether the Record has a valid ID.
-    Additionally, the Record is added to three look up dicts. The keys of those are paths, IDs and
-    the representation of the identifiables.
-
-    The extreme case, that one could imagine, would be that the same Record occurs three times as
-    different Python objects: one that only has an ID, one with only a path and one without ID and
-    path but with identifying properties. During `split_into_inserts_and_updates` all three
-    must be identified with each other (and must be merged). Since we require, that treated
-    entities have a valid ID if they exist in the remote server, all three objects would be
-    identified with each other simply using the IDs.
-
-    In the case that the Record is not yet in the remote server, there cannot be a Python object
-    with an ID. Thus we might have one with a path and one with an identifiable. If that Record
-    does not yet exist, it is necessary that both Python objects have at least either the path or
-    the identifiable in common.
-    """
-
-    def __init__(self):
-        self._id_look_up: dict[int, db.Entity] = {}
-        self._path_look_up: dict[str, db.Entity] = {}
-        self._identifiable_look_up: dict[str, db.Entity] = {}
-        self.remote_missing_counter = -1
-        self._missing: dict[int, db.Entity] = {}
-        self._existing: dict[int, db.Entity] = {}
-
-    def add(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """
-        Add a Record that was treated, such that it is contained in the internal look up dicts
-
-        This Record MUST have an ID if it was found in the remote server.
-        """
-        if record.id is None:
-            if record.path is None and identifiable is None:
-                raise RuntimeError("Record must have ID or path or an identifiable must be given."
-                                   f"Record is\n{record}")
-            record.id = self.remote_missing_counter
-            self.remote_missing_counter -= 1
-            self._add_any(record, self._missing, identifiable)
-        else:
-            self._add_any(record, self._existing, identifiable)
-
-    def get_any(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """
-        Check whether this Record was already added. Identity is based on ID, path or Identifiable
-        represenation
-        """
-        if record.id is not None and record.id in self._id_look_up:
-            return self._id_look_up[record.id]
-        if record.path is not None and record.path in self._path_look_up:
-            return self._path_look_up[record.path]
-        if (identifiable is not None and identifiable.get_representation() in
-                self._identifiable_look_up):
-            return self._identifiable_look_up[identifiable.get_representation()]
-
-    def get_existing(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """ Check whether this Record exists on the remote server
-
-        Returns: The stored Record
-        """
-        rec = self.get_any(record, identifiable)
-        if id(rec) in self._existing:
-            return rec
-        else:
-            return None
-
-    def get_missing(self, record: db.Entity, identifiable: Optional[Identifiable] = None):
-        """ Check whether this Record is missing on the remote server
-
-        Returns: The stored Record
-        """
-        rec = self.get_any(record, identifiable)
-        if id(rec) in self._missing:
-            return rec
-        else:
-            return None
-
-    def get_missing_list(self):
-        """ Return all Records that are missing in the remote server """
-        return list(self._missing.values())
-
-    def get_existing_list(self):
-        """ Return all Records that exist in the remote server """
-        return list(self._existing.values())
-
-    def _add_any(self, record: db.Entity, lookup, identifiable: Optional[Identifiable] = None):
-        if record.id is not None:
-            self._id_look_up[record.id] = record
-        if record.path is not None:
-            self._path_look_up[record.path] = record
-        if identifiable is not None:
-            self._identifiable_look_up[identifiable.get_representation()] = record
-        lookup[id(record)] = record
-
-
 class Crawler(object):
     """
     Crawler class that encapsulates crawling functions.
@@ -365,14 +212,13 @@ class Crawler(object):
         # The following caches store records, where we checked whether they exist on the remote
         # server. Since, it is important to know whether they exist or not, we store them into two
         # different caches.
-        self.treated_records_lookup = TreatedRecordLookUp()
 
         # TODO does it make sense to have this as member variable?
         self.securityMode = securityMode
         # TODO does it make sense to have this as member variable(run_id)?
         self.generate_run_id()
 
-        self.identifiableAdapter: IdentifiableAdapter = LocalStorageIdentifiableAdapter()
+        self.identifiableAdapter: IdentifiableAdapter = CaosDBIdentifiableAdapter()
         if identifiableAdapter is not None:
             self.identifiableAdapter = identifiableAdapter
 
@@ -449,396 +295,51 @@ class Crawler(object):
         self.crawled_data = data
         return data
 
-    def _has_reference_value_without_id(self, ident: Identifiable) -> bool:
-        """
-        Returns True if there is at least one value in the properties and backrefs attributes of
-        ``ident`` which:
-
-        a) is a reference property AND
-        b) where the value is set to a
-           :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>`
-           (instead of an ID) AND
-        c) where the ID of the value (the
-           :external+caosdb-pylib:py:class:`db.Entity <caosdb.common.models.Entity>` object in b))
-           is not set (to an integer)
-
-        Returns
-        -------
-        bool
-            True if there is a value without id (see above)
-
-        Raises
-        ------
-        ValueError
-            If no Identifiable is given.
-        """
-        if ident is None:
-            raise ValueError("Identifiable has to be given as argument")
-        for pvalue in list(ident.properties.values()) + ident.backrefs:
-            if isinstance(pvalue, list):
-                for el in pvalue:
-                    if isinstance(el, db.Entity) and el.id is None:
-                        return True
-            elif isinstance(pvalue, db.Entity) and pvalue.id is None:
-                return True
-        return False
-
-    @staticmethod
-    def create_flat_list(ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None):
-        """
-        Recursively adds entities and all their properties contained in ent_list to
-        the output list flat.
-
-        TODO: This function will be moved to pylib as it is also needed by the
-              high level API.
-        """
-        # Note: A set would be useful here, but we do not want a random order.
-        if flat is None:
-            flat = list()
-        for el in ent_list:
-            if el not in flat:
-                flat.append(el)
-        for ent in ent_list:
-            for p in ent.properties:
-                # For lists append each element that is of type Entity to flat:
-                if isinstance(p.value, list):
-                    for el in p.value:
-                        if isinstance(el, db.Entity):
-                            if el not in flat:
-                                flat.append(el)
-                                Crawler.create_flat_list([el], flat)
-                elif isinstance(p.value, db.Entity):
-                    if p.value not in flat:
-                        flat.append(p.value)
-                        Crawler.create_flat_list([p.value], flat)
-        return flat
-
-    def _has_missing_object_in_references(self, ident: Identifiable, referencing_entities: dict):
-        """
-        returns False if any value in the properties attribute is a db.Entity object that
-        is contained in the `remote_missing_cache`. If ident has such an object in
-        properties, it means that it references another Entity, where we checked
-        whether it exists remotely and it was not found.
-        """
-        if ident is None:
-            raise ValueError("Identifiable has to be given as argument")
-        for pvalue in list(ident.properties.values()) + ident.backrefs:
-            # Entity instead of ID and not cached locally
-            if (isinstance(pvalue, list)):
-                for el in pvalue:
-                    elident = self.identifiableAdapter.get_identifiable(
-                        el, referencing_entities[id(el)])
-                    if (isinstance(el, db.Entity)
-                            and self.treated_records_lookup.get_missing(el, elident) is not None):
-                        return True
-            if (isinstance(pvalue, db.Entity) and self.treated_records_lookup.get_missing(
-                pvalue,
-                    self.identifiableAdapter.get_identifiable(pvalue,
-                                                              referencing_entities[id(pvalue)])
-            ) is not None):
-                # might be checked when reference is resolved
-                return True
-        return False
-
-    def replace_references_with_cached(self, record: db.Record, referencing_entities: dict):
-        """
-        Replace all references with the versions stored in the cache.
-
-        If the cache version is not identical, raise an error.
-        """
-        for p in record.properties:
-            if (isinstance(p.value, list)):
-                lst = []
-                for el in p.value:
-                    if (isinstance(el, db.Entity) and el.id is None):
-                        cached = self.treated_records_lookup.get_any(
-                            el,
-                            self.identifiableAdapter.get_identifiable(
-                                el, referencing_entities[id(el)]))
-                        if cached is None:
-                            lst.append(el)
-                            continue
-                        if not check_identical(cached, el, True):
-                            if isinstance(p.value, db.File):
-                                if p.value.path != cached.path:
-                                    raise RuntimeError(
-                                        "The cached and the referenced entity are not identical.\n"
-                                        f"Cached:\n{cached}\nReferenced:\n{el}"
-                                    )
-                            else:
-                                raise RuntimeError(
-                                    "The cached and the referenced entity are not identical.\n"
-                                    f"Cached:\n{cached}\nReferenced:\n{el}"
-                                )
-                        lst.append(cached)
-                    else:
-                        lst.append(el)
-                p.value = lst
-            if (isinstance(p.value, db.Entity) and p.value.id is None):
-                cached = self.treated_records_lookup.get_any(
-                    p.value, self.identifiableAdapter.get_identifiable(
-                        p.value, referencing_entities[id(p.value)]))
-                if cached is None:
-                    continue
-                if not check_identical(cached, p.value, True):
-                    if isinstance(p.value, db.File):
-                        if p.value.path != cached.path:
-                            raise RuntimeError(
-                                "The cached and the referenced entity are not identical.\n"
-                                f"Cached:\n{cached}\nReferenced:\n{p.value}"
-                            )
-                    else:
-                        raise RuntimeError(
-                            "The cached and the referenced entity are not identical.\n"
-                            f"Cached:\n{cached}\nReferenced:\n{p.value}"
-                        )
-                p.value = cached
-
-    @staticmethod
-    def bend_references_to_new_object(old, new, entities):
-        """ Bend references to the other object
-        Iterate over all entities in `entities` and check the values of all properties of
-        occurances of old Entity and replace them with new Entity
-        """
-        for el in entities:
-            for p in el.properties:
-                if isinstance(p.value, list):
-                    for index, val in enumerate(p.value):
-                        if val is old:
-                            p.value[index] = new
-                else:
-                    if p.value is old:
-                        p.value = new
-
-    def _merge_identified(self, newrecord, record, try_to_merge_later, all_records):
-        """ tries to merge record into newrecord
-
-        If it fails, record is added to the try_to_merge_later list.
-        In any case, references are bent to the newrecord object.
-
-        """
-        try:
-            merge_entities(
-                newrecord, record, merge_references_with_empty_diffs=False,
-                merge_id_with_resolved_entity=True)
-        except EntityMergeConflictError:
-            _treat_merge_error_of(newrecord, record)
-            # We cannot merge but it is none of the clear case where merge is
-            # impossible. Thus we try later
-            try_to_merge_later.append(record)
-            if newrecord.id is not None:
-                record.id = newrecord.id
-        except NotImplementedError:
-            print(newrecord)
-            print(record)
-            raise
-        Crawler.bend_references_to_new_object(
-            old=record, new=newrecord,
-            entities=all_records
-        )
-
-    def _identity_relies_on_unchecked_entities(self, record: db.Record, referencing_entities):
-        """
-        If a record for which it could not yet be verified whether it exists in LA or not is part
-        of the identifying properties, this returns True, otherwise False
-        """
-
-        registered_identifiable = self.identifiableAdapter.get_registered_identifiable(record)
-        if registered_identifiable is None:
-            return False
-        refs = self.identifiableAdapter.get_identifying_referencing_entities(referencing_entities,
-                                                                             registered_identifiable)
-        if any(el is None for el in refs):
-            return True
-
-        refs = self.identifiableAdapter.get_identifying_referenced_entities(
-            record, registered_identifiable)
-        if any([self.treated_records_lookup.get_any(el) is None for el in refs]):
-            return True
-
-        return False
-
-    @staticmethod
-    def create_reference_mapping(flat: list[db.Entity]):
-        """
-        Create a dictionary of dictionaries of the form:
-        dict[int, dict[str, list[Union[int,None]]]]
-
-        - The integer index is the Python id of the value object.
-        - The string is the name of the first parent of the referencing object.
-
-        Each value objects is taken from the values of all properties from the list flat.
-
-        So the returned mapping maps ids of entities to the ids of objects which are referring
-        to them.
-        """
-        # TODO we need to treat children of RecordTypes somehow.
-        references: dict[int, dict[str, list[Union[int, None]]]] = {}
-        for ent in flat:
-            if id(ent) not in references:
-                references[id(ent)] = {}
-            for p in ent.properties:
-                val = p.value
-                if not isinstance(val, list):
-                    val = [val]
-                for v in val:
-                    if isinstance(v, db.Entity):
-                        if id(v) not in references:
-                            references[id(v)] = {}
-                        if ent.parents[0].name not in references[id(v)]:
-                            references[id(v)][ent.parents[0].name] = []
-                        references[id(v)][ent.parents[0].name].append(ent.id)
-
-        return references
-
-    def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
-        flat = Crawler.create_flat_list(ent_list)
-        all_records = list(flat)
-
-        # TODO: can the following be removed at some point
-        for ent in flat:
-            if ent.role == "Record" and len(ent.parents) == 0:
-                raise RuntimeError(f"Records must have a parent.\n{ent}")
-
-        try_to_merge_later = []
-
-        # Check whether Records can be identified without identifiable
-        for i in reversed(range(len(flat))):
-            record = flat[i]
-            # 1. Can it be identified via an ID?
-            if record.id is not None:
-                treated_record = self.treated_records_lookup.get_existing(record)
-                if treated_record is not None:
-                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
-                    all_records.remove(record)
-                    referencing_entities = self.create_reference_mapping(all_records)
-                else:
-                    self.treated_records_lookup.add(record, None)
-                assert record.id
-                del flat[i]
-            # 2. Can it be identified via a path?
-            elif record.path is not None:
-                try:
-                    existing = cached_get_entity_by(path=record.path)
-                except EmptyUniqueQueryError:
-                    existing = None
-                if existing is not None:
-                    record.id = existing.id
-                    # TODO check the following copying of _size and _checksum
-                    # Copy over checksum and size too if it is a file
-                    record._size = existing._size
-                    record._checksum = existing._checksum
-                treated_record = self.treated_records_lookup.get_any(record)
-                if treated_record is not None:
-                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
-                    all_records.remove(record)
-                    referencing_entities = self.create_reference_mapping(all_records)
-                else:
-                    # TODO add identifiable if possible
-                    self.treated_records_lookup.add(record, None)
-                assert record.id
-                del flat[i]
-
+    def split_into_inserts_and_updates(self, st: SyncGraph):
+        """ iteratively identifies nodes in the SyncGraph st and checks whether those exist on the
+        remote server such that in the end two list are being created that list entities that need
+        to be update or inserted"""
         entity_was_treated = True
-        # flat contains Entities which could not yet be checked against the remote server
-        while entity_was_treated and len(flat) > 0:
+        # st.unchecked contains Entities which could not yet be checked against the remote server
+        while entity_was_treated and len(st.unchecked) > 0:
             entity_was_treated = False
-            referencing_entities = self.create_reference_mapping(all_records)
-
-            # For each element we try to find out whether we can find it in the server or whether
-            # it does not yet exist. Since a Record may reference other unkown Records it might not
-            # be possible to answer this right away.
-            # The following checks are done on each Record:
-            # 1. Is it in the cache of already checked Records?
-            # 2. Can it be checked on the remote server?
-            # 3. Does it have to be new since a needed reference is missing?
-            for i in reversed(range(len(flat))):
-                record = flat[i]
-
-                if self._identity_relies_on_unchecked_entities(record,
-                                                               referencing_entities[id(record)]):
+
+            for se in st.unchecked:
+                if se.identifiable is None:  # we cannot yet identify this node
                     continue
 
-                identifiable = self.identifiableAdapter.get_identifiable(
-                    record,
-                    referencing_entities=referencing_entities[id(record)])
-
-                # 1. Is it in the cache of already checked Records?
-                if self.treated_records_lookup.get_any(record, identifiable) is not None:
-                    treated_record = self.treated_records_lookup.get_any(record, identifiable)
-                    # Since the identifiables are the same, treated_record and record actually
-                    # describe the same object.
-                    # We merge record into treated_record in order to prevent loss of information
-                    self._merge_identified(treated_record, record, try_to_merge_later, all_records)
-                    all_records.remove(record)
-                    referencing_entities = self.create_reference_mapping(all_records)
-
-                    del flat[i]
-                    entity_was_treated = True
-
-                # 2. Can it be checked on the remote server?
-                elif not self._has_reference_value_without_id(identifiable):
-                    identified_record = (
-                        self.identifiableAdapter.retrieve_identified_record_for_identifiable(
-                            identifiable))
-                    if identified_record is None:
-                        # identifiable does not exist remotely -> record needs to be inserted
-                        self.treated_records_lookup.add(record, identifiable)
-                    else:
-                        # side effect
-                        record.id = identified_record.id
-                        record.path = identified_record.path
-                        self.treated_records_lookup.add(record, identifiable)
-                    assert record.id
-                    del flat[i]
-                    entity_was_treated = True
-
-                # 3. Does it have to be new since a needed reference is missing?
-                # (Is it impossible to check this record because an identifiable references a
-                # missing record?)
-                elif self._has_missing_object_in_references(identifiable, referencing_entities):
-                    self.treated_records_lookup.add(record, identifiable)
-                    assert record.id
-                    del flat[i]
-                    entity_was_treated = True
-
-            for record in flat:
-                self.replace_references_with_cached(record, referencing_entities)
-
-        # We postponed the merge for records where it failed previously and try it again now.
+                # check remote server
+                identified_record = (
+                    st.identifiableAdapter.retrieve_identified_record_for_identifiable(
+                        se.identifiable))
+                remote_id = None
+                if identified_record is not None:
+                    remote_id = identified_record.id
+                # set id of node. if node is missing, remote_id is None and the SyncGraph marks it
+                # as missing
+                st.set_id_of_node(se, remote_id)
+                entity_was_treated = True
+                break  # one or more nodes were just removed from st.unchecked -> back to start
+
         # This only might add properties of the postponed records to the already used ones.
-        for record in try_to_merge_later:
-            identifiable = self.identifiableAdapter.get_identifiable(
-                record,
-                referencing_entities=referencing_entities[id(record)])
-            newrecord = self.treated_records_lookup.get_any(record, identifiable)
-            merge_entities(newrecord, record, merge_id_with_resolved_entity=True)
-        if len(flat) > 0:
-            circle = self.detect_circular_dependency(flat)
-            if circle is None:
-                logger.error("Failed, but found NO circular dependency. The data is as follows:"
-                             + str(self.compact_entity_list_representation(flat,
-                                                                           referencing_entities)))
-            else:
-                logger.error("Found circular dependency (Note that this might include references "
-                             "that are not identifying properties): "
-                             + self.compact_entity_list_representation(circle,
-                                                                       referencing_entities))
+        if len(st.unchecked) > 0:
+            # circle = st.unchecked_contains_circular_dependency()
+            # if circle is None:
+            #    logger.error("Failed, but found NO circular dependency. The data is as follows:"
+            #                 + "\n".join([str(el) for el in st.unchecked])
+
+            #                 )
+            # else:
+            #    logger.error("Found circular dependency (Note that this might include references "
+            #                 "that are not identifying properties): "
+            #                 + "\n".join([str(el) for el in st.unchecked])
+            #                 )
 
             raise RuntimeError(
-                f"Could not finish split_into_inserts_and_updates. Circular dependency: "
-                f"{circle is not None}")
+                "Could not finish split_into_inserts_and_updates. "
+                "It might be due to a circular dependency")
 
-        # remove negative IDs
-        missing = self.treated_records_lookup.get_missing_list()
-        for el in missing:
-            if el.id is None:
-                raise RuntimeError("This should not happen")  # TODO remove
-            if el.id >= 0:
-                raise RuntimeError("This should not happen")  # TODO remove
-            el.id = None
-
-        return (missing, self.treated_records_lookup.get_existing_list())
+        return st.export_record_lists()
 
     def replace_entities_with_ids(self, rec: db.Record):
         for el in rec.properties:
@@ -851,7 +352,7 @@ class Crawler(object):
                         if val.id is not None:
                             el.value[index] = val.id
 
-    @ staticmethod
+    @staticmethod
     def compact_entity_list_representation(entities, referencing_entities: List) -> str:
         """ a more readable representation than the standard xml representation
 
@@ -883,40 +384,7 @@ class Crawler(object):
 
         return text + "--------\n"
 
-    @ staticmethod
-    def detect_circular_dependency(flat: list[db.Entity]):
-        """
-        Detects whether there are circular references in the given entity list and returns a list
-        where the entities are ordered according to the chain of references (and only the entities
-        contained in the circle are included. Returns None if no circular dependency is found.
-
-        TODO: for the sake of detecting problems for split_into_inserts_and_updates we should only
-        consider references that are identifying properties.
-        """
-        circle = [flat[0]]
-        closed = False
-        while not closed:
-            current = circle[-1]
-            added_to_circle = False
-            for p in current.properties:
-                if isinstance(p.value, list):
-                    for pval in p.value:
-                        if pval in flat:
-                            if pval in circle:
-                                closed = True
-                            circle.append(pval)
-                            added_to_circle = True
-                else:
-                    if p.value in flat:
-                        if p.value in circle:
-                            closed = True
-                        circle.append(p.value)
-                        added_to_circle = True
-            if not added_to_circle:
-                return None
-        return circle
-
-    @ staticmethod
+    @staticmethod
     def _merge_properties_from_remote(
             crawled_data: list[db.Record],
             identified_records: list[db.Record]
@@ -958,7 +426,7 @@ class Crawler(object):
 
         return to_be_updated
 
-    @ staticmethod
+    @staticmethod
     def remove_unnecessary_updates(
             crawled_data: list[db.Record],
             identified_records: list[db.Record]
@@ -984,7 +452,7 @@ class Crawler(object):
 
         return actual_updates
 
-    @ staticmethod
+    @staticmethod
     def execute_parent_updates_in_list(to_be_updated, securityMode, run_id, unique_names):
         """
         Execute the updates of changed parents.
@@ -1027,13 +495,13 @@ class Crawler(object):
                             "mode. This might lead to a failure of inserts that follow.")
                 logger.info(parent_updates)
 
-    @ staticmethod
+    @staticmethod
     def _get_property_id_for_datatype(rtname: str, name: str):
         return cached_get_entity_by(
             query=f"FIND Entity '{escape_squoted_text(rtname)}' "
-                  f"with name='{escape_squoted_text(name)}'").id
+            f"with name='{escape_squoted_text(name)}'").id
 
-    @ staticmethod
+    @staticmethod
     def replace_name_with_referenced_entity_id(prop: db.Property):
         """changes the given property in place if it is a reference property that has a name as
         value
@@ -1078,7 +546,7 @@ class Crawler(object):
                     propval.append(el)
             prop.value = propval
 
-    @ staticmethod
+    @staticmethod
     def execute_inserts_in_list(to_be_inserted, securityMode,
                                 run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
@@ -1098,7 +566,7 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_inserted, run_id, insert=True)
 
-    @ staticmethod
+    @staticmethod
     def set_ids_and_datatype_of_parents_and_properties(rec_list):
         for record in rec_list:
             for parent in record.parents:
@@ -1110,7 +578,7 @@ class Crawler(object):
                     prop.id = entity.id
                     _resolve_datatype(prop, entity)
 
-    @ staticmethod
+    @staticmethod
     def execute_updates_in_list(to_be_updated, securityMode,
                                 run_id: Optional[uuid.UUID] = None,
                                 unique_names=True):
@@ -1124,7 +592,7 @@ class Crawler(object):
                 update_cache = UpdateCache()
                 update_cache.insert(to_be_updated, run_id)
 
-    @ staticmethod
+    @staticmethod
     def check_whether_parent_exists(records: list[db.Entity], parents: list[str]):
         """ returns a list of all records in `records` that have a parent that is in `parents`"""
         problems = []
@@ -1180,7 +648,8 @@ class Crawler(object):
                 "use for example the Scanner to create this data."))
             crawled_data = self.crawled_data
 
-        to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data)
+        to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(
+            SyncGraph(crawled_data, self.identifiableAdapter))
 
         for el in to_be_updated:
             # all entity objects are replaced by their IDs except for the not yet inserted ones
@@ -1211,8 +680,10 @@ class Crawler(object):
         if len(ins_problems) > 0 or len(upd_problems) > 0:
             raise ForbiddenTransaction(
                 "One or more Records that have a parent which is excluded from inserts or updates."
-                f"\nRecords excluded from inserts have the following RecordTypes:\n{[el.parents[0].name for el in ins_problems]}"
-                f"\nRecords excluded from updates have the following RecordTypes:\n{[el.parents[0].name for el in upd_problems]}"
+                f"\nRecords excluded from inserts have the following RecordTypes:\n"
+                f"{[el.parents[0].name for el in ins_problems]}"
+                f"\nRecords excluded from updates have the following RecordTypes:\n"
+                f"{[el.parents[0].name for el in upd_problems]}"
             )
 
         logger.info(f"Going to insert {len(to_be_inserted)} Entities and update "
@@ -1221,14 +692,14 @@ class Crawler(object):
             cache_clear()
             self.execute_parent_updates_in_list(to_be_updated, securityMode=self.securityMode,
                                                 run_id=self.run_id, unique_names=unique_names)
-            logger.info(f"Added parent RecordTypes where necessary.")
+            logger.info("Added parent RecordTypes where necessary.")
             self.execute_inserts_in_list(
                 to_be_inserted, self.securityMode, self.run_id, unique_names=unique_names)
-            logger.info(f"Executed inserts:\n"
+            logger.info("Executed inserts:\n"
                         + self.create_entity_summary(to_be_inserted))
             self.execute_updates_in_list(
                 to_be_updated, self.securityMode, self.run_id, unique_names=unique_names)
-            logger.info(f"Executed updates:\n"
+            logger.info("Executed updates:\n"
                         + self.create_entity_summary(to_be_updated))
 
         update_cache = UpdateCache()
@@ -1244,7 +715,7 @@ class Crawler(object):
 
         return (to_be_inserted, to_be_updated)
 
-    @ staticmethod
+    @staticmethod
     def create_entity_summary(entities: list[db.Entity]):
         """ Creates a summary string reprensentation of a list of entities."""
         parents = {}
@@ -1263,7 +734,7 @@ class Crawler(object):
             output = output[:-2] + "\n"
         return output
 
-    @ staticmethod
+    @staticmethod
     def inform_about_pending_changes(pending_changes, run_id, path, inserts=False):
         # Sending an Email with a link to a form to authorize updates is
         if get_config_setting("send_crawler_notifications"):
@@ -1284,7 +755,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
                     + " by invoking the crawler"
                     " with the run id: {rid}\n".format(rid=run_id))
 
-    @ staticmethod
+    @staticmethod
     def debug_build_usage_tree(converter: Converter):
         res: dict[str, dict[str, Any]] = {
             converter.name: {
@@ -1361,13 +832,13 @@ def _update_status_record(run_id, n_inserts, n_updates, status):
         cr_rec.get_property('status').value = status
         (cr_rec
             .add_property(db.execute_query(
-                f"FIND Property with name='number_of_inserted_entities'", unique=True).id,
+                "FIND Property with name='number_of_inserted_entities'", unique=True).id,
                 n_inserts)
             .add_property(
-                db.execute_query(f"FIND Property with name='number_of_updated_entities'",
+                db.execute_query("FIND Property with name='number_of_updated_entities'",
                                  unique=True).id, n_updates)
             .add_property(
-                db.execute_query(f"FIND Property with name='finished'",
+                db.execute_query("FIND Property with name='finished'",
                                  unique=True).id, datetime.now().isoformat()))
         cr_rec.update()
 
diff --git a/src/caoscrawler/exceptions.py b/src/caoscrawler/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d08cf76fc177407154e38f0eb6aaa47bc863866
--- /dev/null
+++ b/src/caoscrawler/exceptions.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+# Copyright (C) 2024 Henrik tom Wörden <h.tomwoerden@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+class ForbiddenTransaction(Exception):
+    """Thrown if an transactions is needed that is not allowed.
+    For example an update of an entity if the security level is INSERT
+    """
+    pass
+
+
+class MissingReferencingEntityError(Exception):
+    """Thrown if the identifiable requires that some entity references the given entity but there
+    is no such reference """
+
+    def __init__(self, *args, rts=None, **kwargs):
+        self.rts = rts
+        super().__init__(self, *args, **kwargs)
+
+
+class ImpossibleMergeError(Exception):
+    """Thrown if due to identifying information, two SyncNodes  or two Properties of SyncNodes
+    should be merged, but there is conflicting information that prevents this.
+    """
+
+    def __init__(self, *args, pname, values, **kwargs):
+        self.pname = pname
+        self.values = values
+        super().__init__(self, *args, **kwargs)
+
+
+class MissingIdentifyingProperty(Exception):
+    """Thrown if a SyncNode does not have the properties required by the corresponding registered
+    identifiable
+    """
+    pass
diff --git a/src/caoscrawler/identifiable.py b/src/caoscrawler/identifiable.py
index cefdf4a0f42b1f610e0712fdefebc2dc3b78d69f..c7312e12addb89c74d406bdc0e63e1e21e07e12a 100644
--- a/src/caoscrawler/identifiable.py
+++ b/src/caoscrawler/identifiable.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # encoding: utf-8
 #
-# This file is a part of the CaosDB Project.
+# This file is a part of the LinkAhead Project.
 #
 # Copyright (C) 2022 Henrik tom Wörden
 #
@@ -20,23 +20,27 @@
 #
 
 from __future__ import annotations
-import linkahead as db
-from datetime import datetime
+
 import json
+import logging
+from datetime import datetime
 from hashlib import sha256
 from typing import Union
-import logging
+
+import linkahead as db
+
+from .exceptions import MissingIdentifyingProperty
+from .sync_node import SyncNode
 
 logger = logging.getLogger(__name__)
 
 
 class Identifiable():
     """
-    The fingerprint of a Record in CaosDB.
+    The fingerprint of a Record in LinkAhead.
 
-    This class contains the information that is used by the CaosDB Crawler to identify Records.
-    On one hand, this can be the ID or a Record or the path of a File.
-    On the other hand, in order to check whether a Record exits in the CaosDB Server, a query can
+    This class contains the information that is used by the LinkAhead Crawler to identify Records.
+    In order to check whether a Record exits in the LinkAhead Server, a query can
     be created using the information contained in the Identifiable.
 
     Parameters
@@ -46,23 +50,22 @@ class Identifiable():
     properties: dict, keys are names of Properties; values are Property values
                 Note, that lists are not checked for equality but are interpreted as multiple
                 conditions for a single Property.
-    path: str, In case of files: The path where the file is stored.
     backrefs: list, TODO future
     """
 
-    def __init__(self, record_id: int = None, path: str = None, record_type: str = None,
+    def __init__(self, record_id: int = None, record_type: str = None,
                  name: str = None, properties: dict = None,
                  backrefs: list[Union[int, str]] = None):
-        if (record_id is None and path is None and name is None
+        if (record_id is None and name is None
                 and (backrefs is None or len(backrefs) == 0)
                 and (properties is None or len(properties) == 0)):
-            raise ValueError("There is no identifying information. You need to add a path or "
-                             "properties or other identifying attributes.")
+            raise ValueError(
+                "There is no identifying information. You need to add "
+                "properties or other identifying attributes.")
         if properties is not None and 'name' in [k.lower() for k in properties.keys()]:
             raise ValueError("Please use the separete 'name' keyword instead of the properties "
                              "dict for name")
         self.record_id = record_id
-        self.path = path
         self.record_type = record_type
         self.name = name
         if name == "":
@@ -81,20 +84,17 @@ class Identifiable():
     def _value_representation(value) -> str:
         """returns the string representation of property values to be used in the hash function
 
-        The string is the path of a File Entity, the CaosDB ID or Python ID of other Entities
-        (Python Id only if there is no CaosDB ID) and the string representation of bool, float, int
-        and str.
+        The string is the LinkAhead ID in case of SyncNode objects (SyncNode objects must have an ID)
+        and the string representation of None, bool, float, int, datetime and str.
         """
 
         if value is None:
             return "None"
-        elif isinstance(value, db.File):
-            return str(value.path)
-        elif isinstance(value, db.Entity):
+        elif isinstance(value, SyncNode):
             if value.id is not None:
                 return str(value.id)
             else:
-                return "PyID=" + str(id(value))
+                raise RuntimeError("Python Entity (SyncNode) without ID not allowed")
         elif isinstance(value, list):
             return "[" + ", ".join([Identifiable._value_representation(el) for el in value]) + "]"
         elif (isinstance(value, str) or isinstance(value, int) or isinstance(value, float)
@@ -120,27 +120,20 @@ class Identifiable():
         return rec_string
 
     def __eq__(self, other) -> bool:
-        """
-        Identifiables are equal if they belong to the same Record. Since ID and path are on their
-        own enough to identify the Record it is sufficient if those attributes are equal.
-        1. both IDs are set (not None)  -> equal if IDs are equal
-        2. both paths are set (not None)  -> equal if paths are equal
-        3. equal if attribute representations are equal
-        """
+        """ Identifiables are equal if they share the same ID or if the representation is equal """
         if not isinstance(other, Identifiable):
             raise ValueError("Identifiable can only be compared to other Identifiable objects.")
-        elif self.record_id is not None and other.record_id is not None:
+        if self.record_id is not None and other.record_id is not None:
             return self.record_id == other.record_id
-        elif self.path is not None and other.path is not None:
-            return self.path == other.path
         elif self.get_representation() == other.get_representation():
             return True
         else:
             return False
 
     def __repr__(self):
-        pstring = json.dumps(self.properties)
+        """ deterministic text representation of the identifiable """
+        pstring = json.dumps({k: str(v) for k, v in self.properties.items()})
         return (f"{self.__class__.__name__} for RT {self.record_type}: id={self.record_id}; "
-                f"name={self.name}\n\tpath={self.path}\n"
+                f"name={self.name}\n"
                 f"\tproperties:\n{pstring}\n"
                 f"\tbackrefs:\n{self.backrefs}")
diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py
index d95112ee1aec6ca1526c96421a8052282b6ef9a7..517bc97402375a09d9ea3fc48e93db0eb8a87a15 100644
--- a/src/caoscrawler/identifiable_adapters.py
+++ b/src/caoscrawler/identifiable_adapters.py
@@ -2,7 +2,7 @@
 # encoding: utf-8
 #
 # ** header v3.0
-# This file is a part of the CaosDB Project.
+# This file is a part of the LinkAhead Project.
 #
 # Copyright (C) 2021-2022 Henrik tom Wörden
 #               2021-2022 Alexander Schlemmer
@@ -29,7 +29,6 @@ import logging
 import warnings
 from abc import ABCMeta, abstractmethod
 from datetime import datetime
-from functools import lru_cache
 from typing import Any
 
 import linkahead as db
@@ -37,7 +36,9 @@ import yaml
 from linkahead.cached import cached_get_entity_by, cached_query
 from linkahead.utils.escape import escape_squoted_text
 
+from .exceptions import MissingIdentifyingProperty, MissingReferencingEntityError
 from .identifiable import Identifiable
+from .sync_node import SyncNode
 from .utils import has_parent
 
 logger = logging.getLogger(__name__)
@@ -51,7 +52,7 @@ def get_children_of_rt(rtname):
 
 
 def convert_value(value: Any) -> str:
-    """ Return a string representation of the value suitable for the search query.
+    """Return a string representation of the value suitable for the search query.
 
     This is for search queries looking for the identified record.
 
@@ -82,27 +83,27 @@ def convert_value(value: Any) -> str:
 class IdentifiableAdapter(metaclass=ABCMeta):
     """Base class for identifiable adapters.
 
-Some terms:
+    Some terms:
 
-- A *registered identifiable* defines an identifiable template, for example by specifying:
-    - Parent record types
-    - Properties
-    - ``is_referenced_by`` statements
-- An *identifiable* belongs to a concrete record.  It consists of identifying attributes which "fill
-  in" the *registered identifiable*.  In code, it can be represented as a Record based on the
-  *registered identifiable* with all the values filled in.
-- An *identified record* is the result of retrieving a record from the database, based on the
-  *identifiable* (and its values).
+    - A *registered identifiable* defines an identifiable template, for example by specifying:
+        - Parent record types
+        - Properties
+        - ``is_referenced_by`` statements
+    - An *identifiable* belongs to a concrete record.  It consists of identifying attributes which "fill
+      in" the *registered identifiable*.  In code, it can be represented as a Record based on the
+      *registered identifiable* with all the values filled in.
+    - An *identified record* is the result of retrieving a record from the database, based on the
+      *identifiable* (and its values).
 
-General question to clarify:
+    General question to clarify:
 
-- Do we want to support multiple identifiables per RecordType?
-- Current implementation supports only one identifiable per RecordType.
+    - Do we want to support multiple identifiables per RecordType?
+    - Current implementation supports only one identifiable per RecordType.
 
-The list of referenced by statements is currently not implemented.
+    The list of referenced by statements is currently not implemented.
 
-The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered
-identifiabel, identifiable and identified record) for a Record.
+    The IdentifiableAdapter can be used to retrieve the three above mentioned objects (registered
+    identifiabel, identifiable and identified record) for a Record.
 
     """
 
@@ -127,7 +128,7 @@ identifiabel, identifiable and identified record) for a Record.
             eid = ref
             if isinstance(ref, db.Entity):
                 eid = ref.id
-            query_string += (" WHICH IS REFERENCED BY " + str(eid) + " AND")
+            query_string += " WHICH IS REFERENCED BY " + str(eid) + " AND"
 
         query_string += " WITH "
 
@@ -136,22 +137,81 @@ identifiabel, identifiable and identified record) for a Record.
             if len(ident.properties) > 0:
                 query_string += " AND "
 
-        query_string += IdentifiableAdapter.create_property_query(ident, startswith=startswith)
+        query_string += IdentifiableAdapter.create_property_query(
+            ident, startswith=startswith
+        )
 
         # TODO Can these cases happen at all with the current code?
         if query_string.endswith(" AND WITH "):
-            query_string = query_string[:-len(" AND WITH ")]
+            query_string = query_string[: -len(" AND WITH ")]
         if query_string.endswith(" AND "):
-            query_string = query_string[:-len(" AND ")]
+            query_string = query_string[: -len(" AND ")]
         return query_string
 
+    def all_identifying_properties_exist(
+        self, node: SyncNode, raise_exception: bool = True
+    ):
+        """checks whether all identifying properties exist and raises an error if
+        that's not the case. It furthermore raises an error if "name" is part of
+        the identifiable, but the node does not have a name.
+
+        If raise_exception is False, the function returns False instead of raising an error.
+
+        Backreferences are not checked.
+
+        Returns True if all identifying properties exist.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        if node.registered_identifiable is None:
+            if raise_exception:
+                raise RuntimeError("no registered_identifiable")
+            else:
+                return False
+        for prop in node.registered_identifiable.properties:
+            if prop.name.lower() == "is_referenced_by":
+                continue
+            if prop.name.lower() == "name":
+                if node.name is None:
+                    if raise_exception:
+                        i = MissingIdentifyingProperty("The node has no name.")
+                        i.prop = "name"
+                        raise i
+                    else:
+                        return False
+                else:
+                    continue
+
+            # multiple occurances are ok here. We deal with that when actually creating an
+            # identifiable (IDs of referenced Entities might need to get resolved first).
+            if (
+                len(
+                    [
+                        el
+                        for el in node.properties
+                        if el.name.lower() == prop.name.lower()
+                    ]
+                )
+                == 0
+            ):
+                if raise_exception:
+                    i = MissingIdentifyingProperty(
+                        f"The property {prop.name} is missing."
+                    )
+                    i.prop = prop.name
+                    raise i
+                else:
+                    return False
+
+        return True
+
     @staticmethod
     def __create_pov_snippet(pname: str, pvalue, startswith: bool = False):
         """Return something like ``'name'='some value'`` or ``'name' LIKE 'some*'``.
 
-If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE``
-operator will be used to find entities matching at the beginning.
-"""
+        If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE``
+        operator will be used to find entities matching at the beginning.
+        """
         if startswith and isinstance(pvalue, str) and len(pvalue) > 200:
             operator_value_str = f" LIKE '{escape_squoted_text(pvalue[:200])}*'"
         else:
@@ -163,14 +223,14 @@ operator will be used to find entities matching at the beginning.
     def create_property_query(entity: Identifiable, startswith: bool = False):
         """Create a POV query part with the entity's properties.
 
-Parameters
-----------
+        Parameters
+        ----------
 
-entity: Identifiable
-  The Identifiable whose properties shall be used.
+        entity: Identifiable
+          The Identifiable whose properties shall be used.
 
-startswith: bool, optional
-  If True, check string typed properties against the first 200 characters only.  Default is False.
+        startswith: bool, optional
+          If True, check string typed properties against the first 200 characters only.  Default is False.
         """
         query_string = ""
         pov = IdentifiableAdapter.__create_pov_snippet  # Shortcut
@@ -197,7 +257,7 @@ startswith: bool, optional
         return query_string[:-4]
 
     @abstractmethod
-    def get_registered_identifiable(self, record: db.Record):
+    def get_registered_identifiable(self, record: db.Entity):
         """
         Check whether an identifiable is registered for this record and return its definition.
         If there is no identifiable registered, return None.
@@ -210,42 +270,21 @@ startswith: bool, optional
 
     @abstractmethod
     def get_file(self, identifiable: db.File):
-        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
+        warnings.warn(
+            DeprecationWarning("This function is deprecated. Please do not use it.")
+        )
         """
         Retrieve the file object for a (File) identifiable.
         """
         pass
 
-    @staticmethod
-    def get_identifying_referencing_entities(referencing_entities, registered_identifiable):
-        refs = []
-        for prop in registered_identifiable.properties:
-            if prop.name.lower() != "is_referenced_by":
-                continue
-            for looking_for_rt in prop.value:
-                found = False
-                if looking_for_rt == "*":
-                    for val in referencing_entities.values():
-                        if len(val) > 0:
-                            found = True
-                        refs.extend(val)
-                else:
-                    rt_and_children = get_children_of_rt(looking_for_rt)
-                    for rtname in rt_and_children:
-                        if (rtname in referencing_entities):
-                            refs.extend(referencing_entities[rtname])
-                            found = True
-            if not found:
-                raise RuntimeError(
-                    f"Could not find referencing entities of type(s): {prop.value}\n"
-                    f"for registered identifiable:\n{registered_identifiable}\n"
-                    f"There were {len(referencing_entities)} referencing entities to choose from.\n"
-                    f"This error can also occur in case of merge conflicts in the referencing entities."
-                )
-        return refs
-
     @staticmethod
     def get_identifying_referenced_entities(record, registered_identifiable):
+        """Create a list of all entities that are referenced by record
+           and that are used as identying properties of the identifiable.
+
+           Last review by Alexander Schlemmer on 2024-05-29.
+        """
         refs = []
         for prop in registered_identifiable.properties:
             pname = prop.name.lower()
@@ -261,83 +300,101 @@ startswith: bool, optional
                     refs.append(val)
         return refs
 
-    def get_identifiable(self, record: db.Record, referencing_entities=None):
+    def get_identifiable(self, se: SyncNode, identifiable_backrefs: set[SyncNode]) -> Identifiable:
         """
-        Retrieve the registered identifiable and fill the property values to create an
-        identifiable.
+        Take the registered identifiable of given SyncNode ``se`` and fill the property values to
+        create an identifiable.
 
         Args:
-            record: the record for which the Identifiable shall be created.
-            referencing_entities: a dictionary (Type: dict[str, list[db.Entity]]), that
-              allows to look up entities with a certain RecordType, that reference ``record``
+            se: the SyncNode for which the Identifiable shall be created.
+            identifiable_backrefs: a set (Type: set[SyncNode]), that contains SyncNodes
+                                   with a certain RecordType, that reference ``se``
 
         Returns:
             Identifiable, the identifiable for record.
-        """
-
-        registered_identifiable = self.get_registered_identifiable(record)
 
-        if referencing_entities is None:
-            referencing_entities = {}
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
 
         property_name_list_A = []
-        property_name_list_B = []
         identifiable_props = {}
-        identifiable_backrefs = []
-        name_is_identifying_property = False
-
-        if registered_identifiable is not None:
-            identifiable_backrefs = self.get_identifying_referencing_entities(
-                referencing_entities, registered_identifiable)
-            # fill the values:
-            for prop in registered_identifiable.properties:
-                if prop.name == "name":
-                    # The name can be an identifiable, but it isn't a property
-                    name_is_identifying_property = True
-                    continue
-                # problem: what happens with multi properties?
-                # case A: in the registered identifiable
-                # case B: in the identifiable
-
-                # treated above
-                if prop.name.lower() == "is_referenced_by":
-                    continue
+        name = None
+
+        if se.registered_identifiable is None:
+            raise ValueError("no registered_identifiable")
+
+        # fill the values:
+        for prop in se.registered_identifiable.properties:
+            # TDOO:
+            # If there are multiproperties in the registered_identifiable, then only the LAST is
+            # taken into account (later properties overwrite previous one in the dict below).
+            if prop.name == "name":
+                name = se.name
+                continue
 
-                record_prop = record.get_property(prop.name)
-                if record_prop is None:
-                    # TODO: how to handle missing values in identifiables
-                    #       raise an exception?
-                    # TODO: is this the appropriate error?
-                    raise NotImplementedError(
-                        f"The following record is missing an identifying property:\n"
-                        f"RECORD\n{record}\nIdentifying PROPERTY\n{prop.name}"
+            if prop.name.lower() == "is_referenced_by":
+                for el in identifiable_backrefs:
+                    if not isinstance(el, SyncNode):
+                        raise ValueError("Elements of `identifiable_backrefs` must be SyncNodes")
+                if len(identifiable_backrefs) == 0:
+                    raise MissingReferencingEntityError(
+                        f"Could not find referencing entities of type(s): {prop.value}\n"
+                        f"for registered identifiable:\n{se.registered_identifiable}\n"
+                        f"There were {len(identifiable_backrefs)} referencing entities to "
+                        "choose from.\n"
+                        f"This error can also occur in case of merge conflicts in the referencing"
+                        " entities."
                     )
-                identifiable_props[record_prop.name] = record_prop.value
-                property_name_list_A.append(prop.name)
-
-            # check for multi properties in the record:
-            for prop in property_name_list_A:
-                property_name_list_B.append(prop)
-            if (len(set(property_name_list_B)) != len(property_name_list_B) or len(
-                    set(property_name_list_A)) != len(property_name_list_A)):
-                raise RuntimeError(
-                    "Multi properties used in identifiables could cause unpredictable results and "
-                    "are not allowed. You might want to consider a Property with a list as value.")
+                elif len([e.id for e in identifiable_backrefs if el.id is None]) > 0:
+                    raise RuntimeError("Referencing entity has no id")
+                # At this point we know that there is at least one referencing SyncNode
+                # with an ID. We do not need to set any property value (the reference will be used
+                # in the backrefs argument below) and can thus continue with the next identifying
+                # property
+                continue
+
+            options = [p.value for p in se.properties if p.name.lower() == prop.name.lower()]
+            if len(options) == 0:
+                raise MissingIdentifyingProperty(
+                    f"The following record is missing an identifying property:\n"
+                    f"RECORD\n{se}\nIdentifying PROPERTY\n{prop.name}"
+                )
+            for ii, el in enumerate(options):
+                if isinstance(el, SyncNode):
+                    options[ii] = el.id
+                    if el.id is None:
+                        raise RuntimeError(
+                            "Reference to unchecked in identifiable:\n"
+                            f"{prop.name}:\n{el}"
+                        )
+                else:
+                    options[ii] = el
+            if not all([f == options[0] for f in options]):
+                raise RuntimeError("differing prop values ")
+
+            identifiable_props[prop.name] = options[0]
+            property_name_list_A.append(prop.name)
+
+        # check for multi properties in the record:
+        if len(set(property_name_list_A)) != len(property_name_list_A):
+            raise RuntimeError(
+                "Multi properties used in identifiables could cause unpredictable results and "
+                "are not allowed. You might want to consider a Property with a list as value."
+            )
 
         # use the RecordType of the registered Identifiable if it exists
         # We do not use parents of Record because it might have multiple
         try:
             return Identifiable(
-                record_id=record.id,
-                record_type=(registered_identifiable.parents[0].name
-                             if registered_identifiable else None),
-                name=record.name if name_is_identifying_property else None,
+                record_id=se.id,
+                record_type=se.registered_identifiable.parents[0].name,
+                name=name,
                 properties=identifiable_props,
-                path=record.path,
-                backrefs=identifiable_backrefs
+                backrefs=[e.id for e in identifiable_backrefs],
             )
-        except Exception:
-            logger.error(f"Error while creating identifiable for this record:\n{record}")
+        except Exception as exc:
+            logger.error(exc)
+            logger.error(f"Error while creating identifiable for this record:\n{se}")
             raise
 
     @abstractmethod
@@ -352,7 +409,9 @@ startswith: bool, optional
         """
         pass
 
-    def retrieve_identified_record_for_record(self, record: db.Record, referencing_entities=None):
+    def retrieve_identified_record_for_record(
+        self, record: db.Record, referencing_entities=None
+    ):
         """
         This function combines all functionality of the IdentifierAdapter by
         returning the identifiable after having checked for an appropriate
@@ -366,10 +425,36 @@ startswith: bool, optional
         if record.id is not None:
             return cached_get_entity_by(eid=record.id)
 
-        identifiable = self.get_identifiable(record, referencing_entities=referencing_entities)
+        identifiable = self.get_identifiable(
+            record, referencing_entities=referencing_entities
+        )
 
         return self.retrieve_identified_record_for_identifiable(identifiable)
 
+    @staticmethod
+    def referencing_entity_has_appropriate_type(parents, register_identifiable):
+        """returns true if one of the parents is listed by the 'is_referenced_by' property
+
+        This function also returns True if 'is_referenced_by' contains the wildcard '*'.
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        if register_identifiable.get_property("is_referenced_by") is None:
+            return False
+        if register_identifiable.get_property("is_referenced_by").value is None:
+            return False
+
+        appropriate_types = []
+        for rt in register_identifiable.get_property("is_referenced_by").value:
+            appropriate_types.extend(get_children_of_rt(rt))
+        appropriate_types = [el.lower() for el in appropriate_types]
+        if "*" in appropriate_types:
+            return True
+        for parent in parents:
+            if parent.name.lower() in appropriate_types:
+                return True
+        return False
+
 
 class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
     """
@@ -377,8 +462,11 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
     """
 
     def __init__(self):
-        warnings.warn(DeprecationWarning(
-            "This class is deprecated. Please use the CaosDBIdentifiableAdapter."))
+        warnings.warn(
+            DeprecationWarning(
+                "This class is deprecated. Please use the CaosDBIdentifiableAdapter."
+            )
+        )
         self._registered_identifiables = dict()
         self._records = []
 
@@ -393,7 +481,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         Just look in records for a file with the same path.
         """
         candidates = []
-        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
+        warnings.warn(
+            DeprecationWarning("This function is deprecated. Please do not use it.")
+        )
         for record in self._records:
             if record.role == "File" and record.path == identifiable.path:
                 candidates.append(record)
@@ -405,15 +495,18 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
 
     def store_state(self, filename):
         with open(filename, "w") as f:
-            f.write(db.common.utils.xml2str(
-                db.Container().extend(self._records).to_xml()))
+            f.write(
+                db.common.utils.xml2str(db.Container().extend(self._records).to_xml())
+            )
 
     def restore_state(self, filename):
         with open(filename, "r") as f:
             self._records = db.Container().from_xml(f.read())
 
     # TODO: move to super class?
-    def is_identifiable_for_record(self, registered_identifiable: db.RecordType, record: db.Record):
+    def is_identifiable_for_record(
+        self, registered_identifiable: db.RecordType, record: db.Record
+    ):
         """
         Check whether this registered_identifiable is an identifiable for the record.
 
@@ -424,8 +517,7 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         Return True in that case and False otherwise.
         """
         if len(registered_identifiable.parents) != 1:
-            raise RuntimeError(
-                "Multiple parents for identifiables not supported.")
+            raise RuntimeError("Multiple parents for identifiables not supported.")
 
         if not has_parent(record, registered_identifiable.parents[0].name):
             return False
@@ -435,14 +527,13 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
                 return False
         return True
 
-    def get_registered_identifiable(self, record: db.Record):
+    def get_registered_identifiable(self, record: db.Entity):
         identifiable_candidates = []
         for _, definition in self._registered_identifiables.items():
             if self.is_identifiable_for_record(definition, record):
                 identifiable_candidates.append(definition)
         if len(identifiable_candidates) > 1:
-            raise RuntimeError(
-                "Multiple candidates for an identifiable found.")
+            raise RuntimeError("Multiple candidates for an identifiable found.")
         if len(identifiable_candidates) == 0:
             return None
         return identifiable_candidates[0]
@@ -457,8 +548,9 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
         record is the record from the local database to check against.
         identifiable is the record that was created during the crawler run.
         """
-        if (identifiable.record_type is not None
-                and not has_parent(record, identifiable.record_type)):
+        if identifiable.record_type is not None and not has_parent(
+            record, identifiable.record_type
+        ):
             return False
         for propname, propvalue in identifiable.properties.items():
             prop_record = record.get_property(propname)
@@ -487,21 +579,26 @@ class LocalStorageIdentifiableAdapter(IdentifiableAdapter):
                 candidates.append(record)
         if len(candidates) > 1:
             raise RuntimeError(
-                f"Identifiable was not defined unambigiously. Possible candidates are {candidates}")
+                f"Identifiable was not defined unambigiously. Possible candidates are {candidates}"
+            )
         if len(candidates) == 0:
             return None
         return candidates[0]
 
     def resolve_reference(self, value: db.Record):
         if self.get_registered_identifiable(value) is None:
-            raise NotImplementedError("Non-identifiable references cannot"
-                                      " be used as properties in identifiables.")
+            raise NotImplementedError(
+                "Non-identifiable references cannot"
+                " be used as properties in identifiables."
+            )
             # TODO: just resolve the entity
 
         value_identifiable = self.retrieve_identified_record_for_record(value)
         if value_identifiable is None:
-            raise RuntimeError("The identifiable which is used as property"
-                               " here has to be inserted first.")
+            raise RuntimeError(
+                "The identifiable which is used as property"
+                " here has to be inserted first."
+            )
 
         if value_identifiable.id is None:
             raise RuntimeError("The entity has not been assigned an ID.")
@@ -521,7 +618,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
 
     def load_from_yaml_definition(self, path: str):
         """Load identifiables defined in a yaml file"""
-        with open(path, 'r', encoding="utf-8") as yaml_f:
+        with open(path, "r", encoding="utf-8") as yaml_f:
             identifiable_data = yaml.safe_load(yaml_f)
 
         for key, value in identifiable_data.items():
@@ -541,7 +638,9 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
         self._registered_identifiables[name] = definition
 
     def get_file(self, identifiable: Identifiable):
-        warnings.warn(DeprecationWarning("This function is deprecated. Please do not use it."))
+        warnings.warn(
+            DeprecationWarning("This function is deprecated. Please do not use it.")
+        )
         # TODO is this needed for Identifiable?
         # or can we get rid of this function?
         if isinstance(identifiable, db.Entity):
@@ -555,7 +654,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
             return None
         return candidates[0]
 
-    def get_registered_identifiable(self, record: db.Record):
+    def get_registered_identifiable(self, record: db.Entity):
         """
         returns the registered identifiable for the given Record
 
@@ -583,9 +682,13 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
         query_string = self.create_query_for_identifiable(identifiable)
         try:
             candidates = cached_query(query_string)
-        except db.exceptions.HTTPServerError as err:
-            query_string = self.create_query_for_identifiable(identifiable, startswith=True)
-            candidates = cached_query(query_string).copy()  # Copy against cache poisoning
+        except db.exceptions.HTTPServerError:
+            query_string = self.create_query_for_identifiable(
+                identifiable, startswith=True
+            )
+            candidates = cached_query(
+                query_string
+            ).copy()  # Copy against cache poisoning
 
             # Test if the candidates really match all properties
             for pname, pvalue in identifiable.properties.items():
@@ -604,7 +707,8 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
             raise RuntimeError(
                 f"Identifiable was not defined unambiguously.\n{query_string}\nReturned the "
                 f"following {candidates}."
-                f"Identifiable:\n{identifiable.record_type}{identifiable.properties}")
+                f"Identifiable:\n{identifiable.record_type}{identifiable.properties}"
+            )
         if len(candidates) == 0:
             return None
         return candidates[0]
diff --git a/src/caoscrawler/scanner.py b/src/caoscrawler/scanner.py
index 9d1f538732858ff2fbf949d45c359ebb16fe3480..fe8a6dbfdd7de93c84e8cabd28a9c0dae8b8468a 100644
--- a/src/caoscrawler/scanner.py
+++ b/src/caoscrawler/scanner.py
@@ -362,16 +362,19 @@ def scanner(items: list[StructureElement],
                     debug_tree.debug_metadata["usage"][str(element)].add(
                         "/".join(converters_path + [converter.name]))
                     mod_info = debug_tree.debug_metadata["provenance"]
-                    for record_name, prop_name in keys_modified:
-                        # TODO: check
-                        internal_id = record_store_copy.get_internal_id(
-                            record_name)
-                        record_identifier = record_name + \
-                            "_" + str(internal_id)
-                        converter.metadata["usage"].add(record_identifier)
-                        mod_info[record_identifier][prop_name] = (
-                            structure_elements_path + [element.get_name()],
-                            converters_path + [converter.name])
+                    # TODO: actually keys_modified must not be None. create_records should
+                    #       always return a list.
+                    if keys_modified is not None:
+                        for record_name, prop_name in keys_modified:
+                            # TODO: check
+                            internal_id = record_store_copy.get_internal_id(
+                                record_name)
+                            record_identifier = record_name + \
+                                "_" + str(internal_id)
+                            converter.metadata["usage"].add(record_identifier)
+                            mod_info[record_identifier][prop_name] = (
+                                structure_elements_path + [element.get_name()],
+                                converters_path + [converter.name])
 
                 scanner(children, converter.converters,
                         general_store_copy, record_store_copy,
diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c021a10f35e95ca56d45151b8d064ec905993ec
--- /dev/null
+++ b/src/caoscrawler/sync_graph.py
@@ -0,0 +1,719 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+"""
+A data model class for the graph of entities that shall be created during synchronization of the
+crawler.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional, Union, Callable
+
+import linkahead as db
+from linkahead.cached import cached_get_entity_by
+from linkahead.exceptions import EmptyUniqueQueryError
+
+from .identifiable_adapters import IdentifiableAdapter
+from .identifiable import Identifiable
+from .sync_node import SyncNode, TempID
+
+import re
+
+logger = logging.getLogger(__name__)
+
+
+def _set_each_scalar_value(
+    node: SyncNode, condition: Callable[[Any], bool], value: Any
+):
+    """helper function that conditionally replaces each value element of each property of a node
+
+    If the property value is a list, the replacement is done for each list entry.
+    The replacement is only performed if the condition that
+    is provided is fulfilled, i.e. the callable ``condition`` returns True. The callable
+    ``condition`` must take the property value (or list element) as the sole argument.
+
+    Args:
+        node (SyncNode): The node which provides the properties (and their values) to operate on.
+        condition (Callable): A function with one argument which is interpreted as a condition:
+                              Only if it returns True for the property value, the action is
+                              executed.
+        value (Callable): A function returning a new value that is set as the property value. This
+                          function receives the old value as the single argument.
+
+    Last review by Alexander Schlemmer on 2024-05-24.
+    """
+    for p in node.properties:
+        if isinstance(p.value, list):
+            for ii, el in enumerate(p.value):
+                if condition(el):
+                    p.value[ii] = value(el)
+        elif condition(p.value):
+            p.value = value(p.value)
+
+
+class SyncGraph:
+    """
+    A data model class for the graph of entities that shall be created during synchronization of
+    the crawler.
+
+    The SyncGraph combines nodes in the graph based on their identity in order to create a graph of
+    objects that can either be inserted or updated in(to) the remote server. This combination of
+    SyncNodes happens during initialization and later on when the ID of SyncNodes is set.
+
+    When the SyncGraph is initialized, the properties of given entities are scanned and used to
+    create multiple reference maps that track how SyncNodes reference each other.
+    These maps are kept up to date when SyncNodes are merged because they are identified with each
+    other. During initialization, SyncNodes are first merged based on their ID, path or
+    identifiable.
+
+    When additional information is added to the graph by setting the ID of a node
+    (via `set_id_of_node`) then the graph is updated accordingly:
+    - if this information implies that the node is equivalent to another node (e.g. has same ID),
+      then they are merged
+    - if knowing that one node does not exist in the remote server, then this might imply that some
+      other node also does not exist if its identity relies on the latter.
+    - The new ID might make it possible to create the identifiables of connected nodes and thus
+      might trigger further merging of nodes based on the new identifiables.
+
+    A SyncGraph should only be manipulated via one function:
+    - set_id_of_node: a positive integer means the Entity exists, None means it is missing
+    TODO what about String IDs
+
+    The SyncGraph can be converted back to lists of entities which allow to perform the desired
+    inserts and updates.
+
+    Usage:
+    - Initialize the Graph with a list of entities. Those will be converted to the SyncNodes of the
+      graph.
+    - SyncNodes that can be merged are automatically merged and SyncNodes where the existence can
+      be determined are automatically removed from the list of unchecked SyncNodes:
+      graph.unchecked.
+    - You manipulate the graph by setting the ID of a SyncNode (either to a valid ID or to None).
+      For example, you can check whether a SyncNode has an identifiable and then query the remote
+      server and use the result to set the ID.
+    - After each manipulation, the graph updates accordingly (see above)
+    - Ideally, the unchecked list is empty after some manipulation.
+    - You can export a list of entities to be inserted and one of entities to be updated with
+      export_record_lists.
+
+    Last review by Alexander Schlemmer on 2024-05-24.
+    """
+
+    # General implementation remark:
+    # There are three cases where an update of one SyncNode can affect other nodes:
+    # - mark existing (add identifiables)
+    # - mark missing (add identifiables and add (negative) IDs)
+    # - merge (add identifiables)
+    #
+    # We cannot get an infinite recursion where one update triggers another update and so on
+    # because updates are conditional:
+    # Setting an ID removes the node (immediately) from the unchecked list and it is only tried to
+    # set an ID in _mark_missing if a node is in the uncheck list. Thus, setting the ID once
+    # prevents future attempts to set the ID of the same node.
+    # Also, setting an identifiable is only done when needed, i.e. there is no identifiable.
+    # Note, that when ever one node is changed, we check all dependent nodes (see usage of
+    # `_get_nodes_whose_identity_relies_on`) whether something should be updated. Thus, we cannot
+    # miss a necessary update.
+    def __init__(
+        self, entities: list[db.Entity], identifiableAdapter: IdentifiableAdapter
+    ):
+        self.identifiableAdapter = identifiableAdapter
+        # A dictionary allowing for quick lookup of sync nodes using their (possibly negative) IDs.
+        # This dictionary is initially set using _mark_entities_with_path_or_id and later updated
+        # using set_id_of_node or during merges of nodes.
+        self._id_look_up: dict[Union[int, TempID, str], SyncNode] = {}
+        # Similar as above for looking up nodes using paths
+        self._path_look_up: dict[str, SyncNode] = {}
+        # Similar as above for looking up nodes using identifiables. This dictionary uses the text
+        # representation generated by get_representation method of Identifiable as keys.
+        self._identifiable_look_up: dict[str, SyncNode] = {}
+        # look up for the nodes that were marked as being missing (on the remote server)
+        self._missing: dict[int, SyncNode] = {}
+        # same for existing
+        self._existing: dict[int, SyncNode] = {}
+        # entities that are missing get negative IDs to allow identifiable creation
+        self._remote_missing_counter = -1
+
+        self.nodes: list[SyncNode] = []
+        self._initialize_nodes(entities)  # list of all SemanticEntities
+        # list all SemanticEntities that have not yet been checked
+        self.unchecked = list(self.nodes)
+
+        # initialize reference mappings (see _create_reference_mapping)
+        (
+            self.forward_references,  # id(node) -> full set of nodes referenced by the given node
+            self.backward_references,  # id(node) -> full set of nodes referencing the given node
+            # as above, subset where the reference properties are part of identifiables
+            self.forward_references_id_props,
+            self.backward_references_id_props,
+            # as above, subset where references are part of identifiables due to "referenced_by"
+            self.forward_references_backref,
+            self.backward_references_backref,
+        ) = self._create_reference_mapping(self.nodes)
+
+        # remove entities with path or ID from unchecked list
+        self._mark_entities_with_path_or_id()
+
+        # add identifiables where possible
+        for node in list(self.nodes):
+            if self._identifiable_is_needed(node):
+                self._set_identifiable_of_node(node)
+
+        # everything in unchecked neither does have an ID nor a path.
+        # Thus, it must be possible to create an
+        # identifiable which is checked using the following function:
+        for node in self.unchecked:
+            self.identifiableAdapter.all_identifying_properties_exist(node)
+
+    def set_id_of_node(self, node: SyncNode, node_id: Optional[str] = None):
+        """sets the ID attribute of the given SyncNode to node_id.
+
+        If node_id is None, a negative ID will be
+        given indicating that the node does not exist on the remote server.
+        Furthermore it will be marked as missing using _mark_missing.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        if node.id is not None:
+            raise RuntimeError(
+                "Cannot update ID.\n"
+                f"It already is {node.id} and shall be set to {node_id}."
+            )
+        if node_id is None:
+            node_id = TempID(self._get_new_id())
+        node.id = node_id
+        if node_id in self._id_look_up:
+            self._merge_into(node, self._id_look_up[node.id])
+        else:
+            self._id_look_up[node.id] = node
+            if isinstance(node.id, TempID):
+                self._mark_missing(node)
+            else:
+                self._mark_existing(node)
+
+    def export_record_lists(self):
+        """exports the SyncGraph in form of db.Entities
+
+        All nodes are converted to db.Entity objects and reference values that are SyncNodes are
+        replaced by their corresponding (newly created) db.Entity objects.
+
+        Since the result is returned in form of two lists, one with Entities that have a valid ID
+        one with those that haven't, an error is raised if there are any SyncNodes without an
+        (possibly negative) ID.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        # TODO reactivate once the implementation is appropriate
+        # if len(self.unchecked) > 1:
+        # self.unchecked_contains_circular_dependency()
+
+        for el in self.nodes:
+            if el.id is None:
+                raise RuntimeError("Exporting unchecked entities is not supported")
+
+        entities = []
+        node_map = {}
+        for el in self.nodes:
+            entities.append(el.export_entity())
+            node_map[id(el)] = entities[-1]
+
+        for ent in entities:
+            _set_each_scalar_value(
+                ent,
+                condition=lambda val: isinstance(val, SyncNode),
+                value=lambda val: node_map[id(val)],
+            )
+
+        missing = [el for el in entities if el.id < 0]
+        existing = [el for el in entities if el.id > 0]
+        # remove negative IDs
+        for el in missing:
+            el.id = None
+
+        return (missing, existing)
+
+    def _identity_relies_on_unchecked_entity(self, node: SyncNode):
+        """
+        If a record for which it could not yet be verified whether it exists in LA or not is part
+        of the identifying properties, this returns True, otherwise False
+
+        Last review by Alexander Schlemmer on 2024-05-27.
+        """
+
+        return any(
+            [
+                id(ent) not in self._missing and id(ent) not in self._existing
+                for ent in self.forward_references_id_props[id(node)]
+            ]
+            + [
+                id(ent) not in self._missing and id(ent) not in self._existing
+                for ent in self.backward_references_backref[id(node)]
+            ]
+        )
+
+    def unchecked_contains_circular_dependency(self):
+        """
+        Detects whether there are circular references in the given entity list and returns a list
+        where the entities are ordered according to the chain of references (and only the entities
+        contained in the circle are included. Returns None if no circular dependency is found.
+
+        TODO: for the sake of detecting problems for split_into_inserts_and_updates we should only
+        consider references that are identifying properties.
+        """
+        raise NotImplementedError("This function is not yet properly implemented")
+        # TODO if the first element is not part of the circle, then
+        # this will not work
+        # We must created a better implementation (see also TODO in docstring)
+        circle = [self.unchecked[0]]
+        closed = False
+        while not closed:
+            added_to_circle = False
+            for referenced in self.forward_references[id(circle[-1])]:
+                if referenced in self.unchecked:
+                    if referenced in circle:
+                        closed = True
+                    circle.append(referenced)
+                    added_to_circle = True
+            if not added_to_circle:
+                return None
+        return circle
+
+    def get_equivalent(self, entity: SyncNode) -> Optional[SyncNode]:
+        """
+        Return an equivalent SyncNode.
+
+        Equivalent means that ID, path or identifiable are the same.
+        If a new information was added to the given SyncNode (e.g. the ID), it might be possible
+        then to identify an equivalent node (i.e. one with the same ID in this example).
+        There might be more than one equivalent node in the graph. However, simply the first that
+        is found is being returned. (When an equivalent node is found, the given node is
+        typically merged, into the one that was found and after the merge the graph is again
+        checked for equivalent nodes.)
+
+        Returns None if no equivalent node is found.
+
+        Last review by Alexander Schlemmer on 2024-05-28.
+        """
+        if entity.id is not None and entity.id in self._id_look_up:
+            candidate = self._id_look_up[entity.id]
+            if candidate is not entity:
+                return candidate
+        if entity.path is not None and entity.path in self._path_look_up:
+            candidate = self._path_look_up[entity.path]
+            if candidate is not entity:
+                return candidate
+        if (
+            entity.identifiable is not None
+            and entity.identifiable.get_representation() in self._identifiable_look_up
+        ):
+            candidate = self._identifiable_look_up[
+                entity.identifiable.get_representation()
+            ]
+            if candidate is not entity:
+                return candidate
+        return None
+
+    def _get_new_id(self):
+        """returns the next unused temporary ID
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        self._remote_missing_counter -= 1
+        return self._remote_missing_counter
+
+    def _set_identifiable_of_node(
+        self, node: SyncNode, identifiable: Optional[Identifiable] = None
+    ):
+        """sets the identifiable and checks whether an equivalent node can be found with that new
+        information. If an equivalent node is found, 'node' is merged into that node.
+
+        if no identifiable is given, the identifiable is retrieved from the identifiable adapter
+
+        Raises a ValueError if the equivalent node found does not have an identifiable.
+        Raises a RuntimeError if there is no equivalent node found and
+          the (unique) string representation of the identifiable of node is already contained in
+          the identifiable_look_up.
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        if identifiable is None:
+            self.identifiableAdapter.all_identifying_properties_exist(node)
+            identifiable = self.identifiableAdapter.get_identifiable(
+                node, self.backward_references_backref[id(node)]
+            )
+        node.identifiable = identifiable
+        equivalent_se = self.get_equivalent(node)
+        if equivalent_se is not None:
+            self._merge_into(node, equivalent_se)
+        else:
+            if node.identifiable.get_representation() in self._identifiable_look_up:
+                raise RuntimeError("Identifiable is already in the look up")
+            self._identifiable_look_up[node.identifiable.get_representation()] = node
+
+    @staticmethod
+    def _sanity_check(entities: list[db.Entity]):
+        """
+        Checks whether each record in entities has at least one parent.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        for ent in entities:
+            if ent.role == "Record" and len(ent.parents) == 0:
+                raise ValueError(f"Records must have a parent.\n{ent}")
+            if isinstance(ent.id, int) and ent.id < 0:
+                raise ValueError(
+                    f"Records must not have negative integers as IDs.\n{ent}"
+                )
+            if isinstance(ent.id, str) and re.match(r"^-\d+$", ent.id):
+                raise ValueError(
+                    f"Records must not have negative integers as IDs.\n{ent}"
+                )
+
+    def _get_nodes_whose_identity_relies_on(self, node: SyncNode):
+        """returns a set of nodes that reference the given node as identifying property or are
+        referenced by the given node and the parent of the given node is listed as
+        "is_referenced_by"
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        return self.backward_references_id_props[id(node)].union(
+            self.forward_references_backref[id(node)]
+        )
+
+    @staticmethod
+    def _create_flat_list(
+        ent_list: list[db.Entity], flat: Optional[list[db.Entity]] = None
+    ):
+        """
+        Recursively adds entities and all their properties contained in ent_list to
+        the output list flat.
+
+        TODO: This function will be moved to pylib as it is also needed by the
+              high level API.
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        # Note: A set would be useful here, but we do not want a random order.
+        if flat is None:
+            flat = list()
+        for el in ent_list:
+            if el not in flat:
+                flat.append(el)
+        for ent in ent_list:
+            for p in ent.properties:
+                # For lists append each element that is of type Entity to flat:
+                if isinstance(p.value, list):
+                    for el in p.value:
+                        if isinstance(el, db.Entity):
+                            if el not in flat:
+                                flat.append(el)
+                                SyncGraph._create_flat_list([el], flat)
+                elif isinstance(p.value, db.Entity):
+                    if p.value not in flat:
+                        flat.append(p.value)
+                        SyncGraph._create_flat_list([p.value], flat)
+        return flat
+
+    @staticmethod
+    def _create_reference_mapping(flat: list[SyncNode]):
+        """
+        Create six dictionaries that describe references among SyncNodes. All dictionaries use the
+        Python ID of SyncNodes as keys.
+        There is always one dictionary to describe the direction of the reference, i.e.
+        map[id(node)] -> other where other is a set of SyncNodes that are being referenced by node.
+        And then there is always one dictionary for the inverse direction. The two dictionaries are
+        named "forward_" and "backward_", respectively.
+
+        Then there are three kinds of maps being generated: One includes all references
+        ("_references"), one includes references that are values of identifying properties
+        ("_references_id_props") and one includes references that are relevant for identifying
+        backreferences/"is_referenced_by" ("_references_backref"). I.e. the two latter are subesets
+        of the former reference map.
+
+        Arguments:
+        ----------
+           flat: list[SyncNode]
+                 all SyncNodes that span the graph for which the reference map shall be created
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        # TODO we need to treat children of RecordTypes somehow.
+        forward_references: dict[int, set[SyncNode]] = {}
+        backward_references: dict[int, set[SyncNode]] = {}
+        forward_references_id_props: dict[int, set[SyncNode]] = {}
+        backward_references_id_props: dict[int, set[SyncNode]] = {}
+        forward_references_backref: dict[int, set[SyncNode]] = {}
+        backward_references_backref: dict[int, set[SyncNode]] = {}
+
+        # initialize with empty lists/dict
+        for node in flat:
+            forward_references[id(node)] = set()
+            backward_references[id(node)] = set()
+            forward_references_id_props[id(node)] = set()
+            backward_references_id_props[id(node)] = set()
+            forward_references_backref[id(node)] = set()
+            backward_references_backref[id(node)] = set()
+        for node in flat:
+            for p in node.properties:
+                val = p.value
+                if not isinstance(val, list):
+                    val = [val]
+                for v in val:
+                    if isinstance(v, SyncNode):
+                        forward_references[id(node)].add(v)
+                        backward_references[id(v)].add(node)
+                        if (
+                            node.registered_identifiable is not None
+                            and len(
+                                [
+                                    el.name
+                                    for el in node.registered_identifiable.properties
+                                    if el.name == p.name
+                                ]
+                            )
+                            > 0
+                        ):
+                            forward_references_id_props[id(node)].add(v)
+                            backward_references_id_props[id(v)].add(node)
+                        if (
+                            v.registered_identifiable is not None
+                            and IdentifiableAdapter.referencing_entity_has_appropriate_type(
+                                node.parents, v.registered_identifiable
+                            )
+                        ):
+                            forward_references_backref[id(node)].add(v)
+                            backward_references_backref[id(v)].add(node)
+
+        return (
+            forward_references,
+            backward_references,
+            forward_references_id_props,
+            backward_references_id_props,
+            forward_references_backref,
+            backward_references_backref,
+        )
+
+    def _mark_entities_with_path_or_id(self):
+        """A path or an ID is sufficiently identifying. Thus, those entities can be marked as
+        checked
+
+        When this function returns, there is only one node for each ID (i.e. no two nodes with the
+        same ID). The same is true for paths.
+
+        This function also updates _id_look_up and _path_look_up
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        for node in list(self.nodes):
+            if node.id is not None:
+                eq_node = self.get_equivalent(node)
+                if eq_node is not None:
+                    self._basic_merge_into(node, eq_node)
+                else:
+                    self._id_look_up[node.id] = node
+                    self._mark_existing(node)
+
+        for node in list(self.nodes):
+            if node.path is not None:
+                eq_node = self.get_equivalent(node)
+                if eq_node is not None:
+                    self._basic_merge_into(node, eq_node)
+                else:
+                    self._path_look_up[node.path] = node
+                    try:
+                        existing = cached_get_entity_by(path=node.path)
+                    except EmptyUniqueQueryError:
+                        existing = None
+                    remote_id = None
+                    if existing is not None:
+                        remote_id = existing.id
+                    self.set_id_of_node(node, remote_id)
+
+    def _basic_merge_into(self, source: SyncNode, target: SyncNode):
+        """tries to merge source into target and updates member variables
+
+        - reference maps are updated
+        - self.nodes is updated
+        - self.unchecked is updated
+        - lookups are being updated
+        """
+        # sanity checks
+        if source is target:
+            raise ValueError("source must not be target")
+
+        target.update(source)
+
+        # replace actual reference property values
+        for node in self.backward_references[id(source)]:
+            _set_each_scalar_value(
+                node, condition=lambda val: val is source, value=lambda val: target
+            )
+
+        # update reference mappings
+        for setA, setB in (
+            (self.forward_references, self.backward_references),  # ref: source -> other
+            (self.backward_references, self.forward_references),  # ref: other -> source
+            (self.forward_references_id_props, self.backward_references_id_props),
+            (self.backward_references_id_props, self.forward_references_id_props),
+            (self.forward_references_backref, self.backward_references_backref),
+            (self.backward_references_backref, self.forward_references_backref),
+        ):
+            for node in setA.pop(id(source)):
+                setA[id(target)].add(node)
+                setB[id(node)].remove(source)
+                setB[id(node)].add(target)
+
+        # remove unneeded SyncNode
+        self.nodes.remove(source)
+        if source in self.unchecked:
+            self.unchecked.remove(source)
+        # update look ups
+        if target.id is not None:
+            self._id_look_up[target.id] = target
+        if target.path is not None:
+            self._path_look_up[target.path] = target
+        if target.identifiable is not None:
+            self._identifiable_look_up[target.identifiable.get_representation()] = target
+
+    def _merge_into(self, source: SyncNode, target: SyncNode):
+        """tries to merge source into target and performs the necessary updates:
+        - update the member variables of target using source (``target.update(source)``).
+        - replaces reference values to source by target
+        - updates the reference map
+        - updates lookup tables
+        - removes source from node lists
+        - marks target as missing/existing if source was marked that way
+        - adds an identifiable if now possible (e.g. merging based on ID might allow create an
+          identifiable when none of the two nodes had the sufficient properties on its own before)
+        - check whether dependent nodes can now get an identifiable (the merge might have set the
+          ID such that dependent nodes can now create an identifiable)
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        self._basic_merge_into(source, target)
+
+        if (id(source) in self._existing and id(target) in self._missing) or (
+            id(target) in self._existing and id(source) in self._missing
+        ):
+            raise RuntimeError("Trying to merge missing and existing")
+
+        if id(source) in self._missing and id(target) not in self._missing:
+            self._mark_missing(target)
+        elif id(source) in self._existing and id(target) not in self._existing:
+            self._mark_existing(target)
+
+        # due to the merge it might now be possible to create an identifiable
+        if self._identifiable_is_needed(target):
+            self._set_identifiable_of_node(target)
+        # This is one of three cases that affect other nodes:
+        # - mark existing
+        # - mark missing
+        # - merge
+        self._add_identifiables_to_dependent_nodes(target)
+
+        eq_node = self.get_equivalent(target)
+        if eq_node is not None:
+            self._merge_into(target, eq_node)
+
+    def _identifiable_is_needed(self, node: SyncNode):
+        """
+        This function checks:
+        - the identifiable of node is None
+        - the node has all properties that are needed for the identifiable
+        - there are no unchecked entities that are needed for the identifiable of the node,
+          neither as forward or as backward references
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        return (
+            node.identifiable is None
+            and not self._identity_relies_on_unchecked_entity(node)
+            and self.identifiableAdapter.all_identifying_properties_exist(
+                node, raise_exception=False
+            )
+        )
+
+    def _initialize_nodes(self, entities: list[db.Entity]):
+        """create initial set of SyncNodes from provided Entity list"""
+        self._sanity_check(entities)
+        entities = self._create_flat_list(entities)
+        se_lookup: dict[int, SyncNode] = {}  # lookup: python id -> SyncNode
+
+        # Create new sync nodes from the list of entities, their registered identifiables
+        # are set from the identifiable adapter.
+        for el in entities:
+            self.nodes.append(
+                SyncNode(el, self.identifiableAdapter.get_registered_identifiable(el))
+            )
+            se_lookup[id(el)] = self.nodes[-1]
+
+        # replace db.Entity objects with SyncNodes in references:
+        for node in self.nodes:
+            _set_each_scalar_value(
+                node,
+                condition=lambda val: id(val) in se_lookup,
+                value=lambda val: se_lookup[id(val)],
+            )
+
+    def _add_identifiables_to_dependent_nodes(self, node):
+        """For each dependent node, we check whether this allows to create an identifiable
+
+        Last review by Alexander Schlemmer on 2024-05-29.
+        """
+        for other_node in self._get_nodes_whose_identity_relies_on(node):
+            if self._identifiable_is_needed(other_node):
+                self._set_identifiable_of_node(other_node)
+
+    def _mark_missing(self, node: SyncNode):
+        """Mark a sync node as missing and remove it from the dictionary of unchecked nodes.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        self._missing[id(node)] = node
+        self.unchecked.remove(node)
+
+        # This is one of three cases that affect other nodes:
+        # - mark existing
+        # - mark missing
+        # - merge
+        self._add_identifiables_to_dependent_nodes(node)
+        # For each dependent node, we set the ID to None (missing)
+        # (None is the default second argument of set_id_of_node.)
+        for other_node in self._get_nodes_whose_identity_relies_on(node):
+            if other_node in self.unchecked:
+                self.set_id_of_node(other_node)
+
+    def _mark_existing(self, node: SyncNode):
+        """Mark a sync node as existing and remove it from the dictionary of unchecked nodes.
+
+        Last review by Alexander Schlemmer on 2024-05-24.
+        """
+        if isinstance(node.id, TempID):
+            raise ValueError("ID must valid existing entities, not TempID")
+        self._existing[id(node)] = node
+        self.unchecked.remove(node)
+        # This is one of three cases that affect other nodes:
+        # - mark existing
+        # - mark missing
+        # - merge
+        self._add_identifiables_to_dependent_nodes(node)
diff --git a/src/caoscrawler/sync_node.py b/src/caoscrawler/sync_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..141e743bffa09f0caf661bcd1939a4233cb7249c
--- /dev/null
+++ b/src/caoscrawler/sync_node.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Henrik tom Wörden
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any, Optional, Union
+
+import linkahead as db
+import yaml
+from linkahead.common.models import Parent, _ParentList, _Properties
+from warnings import warn
+
+from .exceptions import ImpossibleMergeError
+
+if TYPE_CHECKING:
+    from .identifiable import Identifiable
+
+logger = logging.getLogger(__name__)
+
+
+class TempID(int):
+    """A special kind of int for negative temporary IDs.
+
+    This allows to identify TempIDs in the presence of String IDs.
+    A string ID might look like a negative integer.
+    """
+    pass
+
+
+class SyncNode(db.Entity):
+    """represents the information of an Entity as it shall be created in LinkAhead
+
+    The following information is taken from an db.Entity object during initialization or when the
+    object is updated using the `update` member function:
+    - id
+    - role
+    - path
+    - file
+    - name
+    - description
+    - parents
+    - properties
+
+    Typically, this class is used in the following way:
+    1. A SyncNode is initialized with a db.Entity object.
+    2. The SyncNode object is possibly updated one or more times with other SyncNode objects.
+    3. A db.Entity object is created (`export_entity`) that contains the combined information.
+    """
+
+    def __init__(
+        self, entity: db.Entity, registered_identifiable: Optional[db.RecordType] = None,
+        **kwargs
+    ):
+        super().__init__(name=entity.name,
+                         id=entity.id,
+                         description=entity.description,
+                         **kwargs)
+        # db.Entity properties
+        self.role = entity.role
+        self.path = entity.path
+        self.file = entity.file
+        self.parents = _ParentList().extend(entity.parents)
+        self.properties = _Properties().extend(entity.properties)
+        self._check_for_multiproperties()
+        # other members
+        self.identifiable: Optional[Identifiable] = None
+        self.registered_identifiable = registered_identifiable
+
+    def update(self, other: SyncNode) -> None:
+        """update this node with information of given ``other`` SyncNode.
+
+        parents are added if they are not yet in the list
+        properties are added in any case. This may lead to duplication of properties.
+        We allow this duplication here and remove it when we create a db.Entity (export_entity
+        function) because if property values are SyncNode objects, they might not be comparable (no
+        ID, no identifiable) yet.
+        """
+
+        if other.identifiable is not None and self.identifiable is not None:
+            if (
+                other.identifiable.get_representation()
+                != self.identifiable.get_representation()
+            ):
+                raise ValueError(
+                    "The SyncNode that is used with update must have an equivalent"
+                    f" identifiable. I.e. you cannot merge entities with differing identifiables"
+                    "The identifiables where:\n"
+                    f"{self.identifiable._create_hashable_string(self.identifiable)}\n"
+                    f"and\n{other.identifiable._create_hashable_string(other.identifiable)}."
+                )
+
+        if other.identifiable:
+            self.identifiable = other.identifiable
+        for attr in ["id", "role", "path", "file", "name", "description"]:
+            if other.__getattribute__(attr) is not None:
+                if self.__getattribute__(attr) is None:
+                    self.__setattr__(attr, other.__getattribute__(attr))
+                else:
+                    if self.__getattribute__(attr) != other.__getattribute__(attr):
+                        raise ImpossibleMergeError(
+                            f"Trying to update {attr} but this would lead to an "
+                            f"override of the value '{self.__getattribute__(attr)}' "
+                            f"by the value '{other.__getattribute__(attr)}'",
+                            pname=attr, values=(self.__getattribute__(attr),
+                                                other.__getattribute__(attr))
+                        )
+        for p in other.parents:
+            if not parent_in_list(p, self.parents):
+                self.parents.append(p)
+        for p in other.properties:
+            self.properties.append(p)
+
+    def export_entity(self) -> db.Entity:
+        """create a db.Entity object from this SyncNode
+
+        Properties are only added once (based on id or name). If values do not match, an Error is
+        raised. If values are SyncNode objects with IDs, they are considered equal if their IDs are
+        equal.
+        """
+        ent = None
+        if self.role == "Record":
+            ent = db.Record()
+        elif self.role == "File":
+            ent = db.File()
+        else:
+            raise RuntimeError("Invalid role")
+        for attr in ["id", "role", "path", "file", "name", "description"]:
+            ent.__setattr__(attr, self.__getattribute__(attr))
+        for p in self.parents:
+            ent.add_parent(p)
+        for p in self.properties:
+            entval: Any = ent.get_property(p)
+            if entval is None:
+                ent.add_property(id=p.id, name=p.name, value=p.value, description=p.description,
+                                 datatype=p.datatype, unit=p.unit)
+            else:
+                entval = entval.value
+                unequal = False
+                pval = p.value
+                if isinstance(entval, list) != isinstance(pval, list):
+                    unequal = True
+                if not isinstance(entval, list):
+                    entval = [entval]
+                if not isinstance(pval, list):
+                    pval = [pval]
+                if len(entval) != len(pval):
+                    unequal = True
+                else:
+                    for e_el, p_el in zip(entval, pval):
+                        if isinstance(e_el, SyncNode) and e_el.id is not None:
+                            e_el = e_el.id
+                        if isinstance(p_el, SyncNode) and p_el.id is not None:
+                            p_el = p_el.id
+                        if e_el != p_el:
+                            unequal = True
+
+                if unequal:
+                    logger.error(
+                        "The Crawler is trying to create an entity,"
+                        " but there are conflicting property values."
+                        f"Problematic Property: {p.name}\n"
+                        f"First value:\n{entval}\n"
+                        f"Second value:\n{pval}\n"
+                        f"{self}"
+                    )
+                    ime = ImpossibleMergeError(
+                        "Cannot merge Entities", pname=p.name, values=(entval, pval)
+                    )
+                    raise ime
+        return ent
+
+    def __repr__(self) -> str:
+        """ somewhat concise text representation of the SyncNode """
+        res = f"\n=====================================================\n{self.role}\n"
+        res += yaml.dump(
+            {
+                "id": self.id,
+                "name": self.name,
+                "path": self.path,
+                "parents": [el.name for el in self.parents],
+            },
+            allow_unicode=True,
+        )
+        res += "---------------------------------------------------\n"
+        res += "properties:\n"
+        d: dict[str, Any] = {}
+        for p in self.properties:
+            v = p.value
+            d[p.name] = []
+            if not isinstance(p.value, list):
+                v = [v]
+            for el in v:
+                if isinstance(el, SyncNode):
+                    d[p.name].append(
+                        {
+                            "id": el.id,
+                            "name": el.name,
+                            "path": el.path,
+                            "parents": [e.name for e in el.parents],
+                        }
+                    )
+                else:
+                    d[p.name].append(el)
+
+        return (
+            res
+            + yaml.dump(d, allow_unicode=True)
+            + "=====================================================\n"
+        )
+
+    def _check_for_multiproperties(self):
+        """ warns if multiproperties are present """
+        ids = set()
+        names = set()
+        for p in self.properties:
+            if p.name is not None:
+                if p.name in names:
+                    warn("Multiproperties are not supported by the crawler.")
+                names.add(p.name)
+            if p.id is not None:
+                if p.id in ids:
+                    warn("Multiproperties are not supported by the crawler.")
+                ids.add(p.id)
+
+
+def parent_in_list(parent: Parent, plist: _ParentList) -> bool:
+    """helper function that checks whether a parent with the same name or ID is in the plist"""
+    missing = False
+    if parent.name is not None:
+        if parent.name not in plist._element_by_name:
+            missing = True
+    if parent.id is not None:
+        if str(parent.id) not in plist._element_by_id:
+            missing = True
+    return not missing
+
+
+def property_in_list(prop: db.Property, plist: _Properties) -> bool:
+    """helper function that checks whether a property with the same name or ID is in the plist"""
+    missing = False
+    if prop.name is not None:
+        if prop.name not in plist._element_by_name:
+            missing = True
+    if prop.id is not None:
+        if str(prop.id) not in plist._element_by_id:
+            missing = True
+    return not missing
diff --git a/src/doc/getting_started/furtherreading.rst b/src/doc/getting_started/furtherreading.rst
index eb600416c1fce3857d28fc2e856ceabebb3a8bb7..8d8d3ecc4b5575f71e90e9e5a17b060a63403a07 100644
--- a/src/doc/getting_started/furtherreading.rst
+++ b/src/doc/getting_started/furtherreading.rst
@@ -6,3 +6,4 @@ Further reading
 - Some useful examples can be found in the `integration tests
   <https://gitlab.com/caosdb/caosdb-crawler/-/tree/main/integrationtests>`_ (and to a certain extent
   in the unit tests).
+- TODO: Information on caching
diff --git a/tox.ini b/tox.ini
index e587774323171dda590c4e9198e049c1ed0e0e14..36807a619a9536a02908c36364e02ba52c1a0d69 100644
--- a/tox.ini
+++ b/tox.ini
@@ -16,6 +16,9 @@ commands = caosdb-crawler --help
 [flake8]
 max-line-length = 100
 
+[pycodestyle]
+max-line-length = 100
+
 [pytest]
 testpaths = unittests
-xfail_strict = True
\ No newline at end of file
+xfail_strict = True
diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py
index a48b5e16ad1a71beeb4a5bf1c2ac52f67bbd7afe..4e8b057e382e6353698b8b63bbcc4e648284d711 100644
--- a/unittests/test_crawler.py
+++ b/unittests/test_crawler.py
@@ -39,10 +39,12 @@ import linkahead.common.models as dbmodels
 import pytest
 import yaml
 from caosadvancedtools.models.parser import parse_model_from_string
-from caoscrawler.crawl import (Crawler, SecurityMode, TreatedRecordLookUp,
-                               _treat_deprecated_prefix, crawler_main,
-                               split_restricted_path)
+from caoscrawler.crawl import (Crawler, SecurityMode, _treat_deprecated_prefix,
+                               crawler_main, split_restricted_path)
 from caoscrawler.debug_tree import DebugTree
+from caoscrawler.exceptions import (ImpossibleMergeError,
+                                    MissingIdentifyingProperty,
+                                    MissingReferencingEntityError)
 from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
                                                IdentifiableAdapter,
@@ -52,6 +54,7 @@ from caoscrawler.scanner import (create_converter_registry, scan_directory,
 from caoscrawler.stores import GeneralStore, RecordStore
 from caoscrawler.structure_elements import (DictElement, DictListElement,
                                             DictTextElement, File)
+from caoscrawler.sync_graph import SyncGraph
 from linkahead.apiutils import compare_entities
 from linkahead.cached import cache_clear
 from linkahead.exceptions import EmptyUniqueQueryError
@@ -87,6 +90,20 @@ NEW_ELEMENT = (db.Record()
                .add_property(name="result", value="homogeneous"))
 
 
+def reset_mocks(mocks):
+    for mock in mocks:
+        mock.reset_mock()
+
+
+def mock_create_values(values, element):
+    pass
+
+
+def mock_get_entity_by_query(query=None):
+    if query is not None:
+        return db.Record(id=1111, name='rec_name').add_parent('RT')
+
+
 def mock_get_entity_by(eid=None, name=None, path=None):
     if eid is not None:
         candidates = [el for el in EXAMPLE_SERVER_STATE if el.id == eid]
@@ -110,6 +127,14 @@ def mock_get_entity_by(eid=None, name=None, path=None):
             raise EmptyUniqueQueryError("")
 
 
+def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
+    """ returns a stored Record if rec.name is an existing key, None otherwise """
+    if rec.name in known:
+        return known[rec.name]
+    else:
+        return None
+
+
 def mock_retrieve_record(identifiable: Identifiable):
     """ assumes that the identifiable is always only the date"""
 
@@ -156,8 +181,56 @@ def clear_cache():
     cache_clear()
 
 
+@pytest.fixture
+def crawler_mocked_identifiable_retrieve():
+    crawler = Crawler()
+    # TODO use minimal setup
+    # mock retrieval of registered identifiabls: return Record with just a parent
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent(x.parents[0].name).add_property(name='name'))
+
+    # Simulate remote server content by using the names to identify records
+    # There is only a single known Record with name A
+    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
+        basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
+    return crawler
+
+
+@pytest.fixture
+def crawler_mocked_for_backref_test():
+    crawler = Crawler()
+    # mock retrieval of registered identifiabls: return Record with just a parent
+
+    def get_reg_ident(x):
+        if x.parents[0].name == "C":
+            return db.Record().add_parent(x.parents[0].name).add_property(
+                "is_referenced_by", value=["BR"]).add_property("name")
+        elif x.parents[0].name == "D":
+            return db.Record().add_parent(x.parents[0].name).add_property(
+                "is_referenced_by", value=["BR", "BR2"]).add_property("name")
+        else:
+            return db.Record().add_parent(x.parents[0].name).add_property("name")
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
+
+    # Simulate remote server content by using the names to identify records
+    # There is only a single known Record with name A
+    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
+        basic_retrieve_by_name_mock_up, known={"A":
+                                               db.Record(id=1111, name="A").add_parent("BR")}))
+    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A":
+                                                   db.Record(id=1111, name="A").add_parent("BR")}))
+    return crawler
+
+
 @pytest.mark.filterwarnings("ignore::DeprecationWarning")
 def test_constructor():
+    # tests that appropriate DeprecationWarnings are triggered by the constructor when deprecated
+    # arguments are being passed.
     with warnings.catch_warnings(record=True) as w:
         # Cause all warnings to always be triggered.
         warnings.filterwarnings("ignore")
@@ -174,6 +247,7 @@ def test_constructor():
 
 @pytest.mark.filterwarnings("ignore::DeprecationWarning")
 def test_deprecated_functions():
+    # tests that appropriate DeprecationWarnings are triggered by deprecated methods
     with warnings.catch_warnings(record=True) as w:
         # Cause all warnings to always be triggered.
         warnings.filterwarnings("ignore")
@@ -218,95 +292,62 @@ def test_check_whether_parent_exists():
 
 def test_remove_unnecessary_updates():
     # test trvial case
-    upl = [db.Record().add_parent("A")]
-    irs = [db.Record().add_parent("A")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A")]
+    identified_records = [db.Record().add_parent("A")]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 0
 
     # test property difference case
-    # TODO this should work right?
-    # upl = [db.Record().add_parent("A").add_property("a", 3)]
-    # irs = [db.Record().add_parent("A")]  # ID should be s
-    # Crawler.remove_unnecessary_updates(upl, irs)
-    # assert len(upl) == 1
+    crawled_data = [db.Record().add_parent("A").add_property("a", 3)]
+    identified_records = [db.Record().add_parent("A")]  # ID should be s
+    Crawler.remove_unnecessary_updates(crawled_data, identified_records)
+    assert len(crawled_data) == 1
 
     # test value difference case
-    upl = [db.Record().add_parent("A").add_property("a", 5)]
-    irs = [db.Record().add_parent("A").add_property("a")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a", 5)]
+    identified_records = [db.Record().add_parent("A").add_property("a")]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 1
-    upl = [db.Record().add_parent("A").add_property("a", 5)]
-    irs = [db.Record().add_parent("A").add_property("a", 5)]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a", 5)]
+    identified_records = [db.Record().add_parent("A").add_property("a", 5)]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 0
 
     # test unit difference case
-    upl = [db.Record().add_parent("A").add_property("a", unit='cm')]
-    irs = [db.Record().add_parent("A").add_property("a")]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a", unit='cm')]
+    identified_records = [db.Record().add_parent("A").add_property("a")]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 1
 
     # test None difference case
-    upl = [db.Record().add_parent("A").add_property("a")]
-    irs = [db.Record().add_parent("A").add_property("a", 5)]
-    updates = Crawler.remove_unnecessary_updates(upl, irs)
+    crawled_data = [db.Record().add_parent("A").add_property("a")]
+    identified_records = [db.Record().add_parent("A").add_property("a", 5)]
+    updates = Crawler.remove_unnecessary_updates(crawled_data, identified_records)
     assert len(updates) == 1
 
 
 def test_split_into_inserts_and_updates_trivial():
     crawler = Crawler()
-    crawler.split_into_inserts_and_updates([])
-
+    st = SyncGraph([], crawler.identifiableAdapter)
+    crawler.split_into_inserts_and_updates(st)
 
-def test_split_into_inserts_and_updates_unidentified():
-    crawler = Crawler()
-    with raises(ValueError) as err:
-        crawler.split_into_inserts_and_updates([db.Record(name="recname").add_parent("someparent")])
-    assert str(err.value).startswith("There is no identifying information.")
 
-
-def basic_retrieve_by_name_mock_up(rec, referencing_entities=None, known=None):
-    """ returns a stored Record if rec.name is an existing key, None otherwise """
-    if rec.name in known:
-        return known[rec.name]
-    else:
-        return None
-
-
-@pytest.fixture
-def crawler_mocked_identifiable_retrieve():
-    crawler = Crawler()
-    # TODO use minimal setup
-    # mock retrieval of registered identifiabls: return Record with just a parent
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(
-        side_effect=lambda x: db.Record().add_parent(x.parents[0].name).add_property(name='name'))
-
-    # Simulate remote server content by using the names to identify records
-    # There is only a single known Record with name A
-    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
-        side_effect=partial(
-            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
-    return crawler
-
-
-def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retrieve):
+def test_split_into_inserts_and_updates_simple(crawler_mocked_identifiable_retrieve):
+    # basic test that checks whether two records are correctly sorted to update and insert based on
+    # whether an entity can be found using the identifiable
     crawler = crawler_mocked_identifiable_retrieve
     identlist = [Identifiable(name="A", record_type="C"), Identifiable(name="B", record_type="C")]
-    entlist = [db.Record(name="A").add_parent(
-        "C"), db.Record(name="B").add_parent("C")]
+    entlist = [db.Record(name="A").add_parent("C"),
+               db.Record(name="B").add_parent("C")]
 
-    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None
-    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[0]) is None
-    assert not crawler._has_reference_value_without_id(identlist[0])
-    assert not crawler._has_reference_value_without_id(identlist[1])
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    # check setup
     assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
         identlist[0]).id == 1111
     assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
         identlist[1]) is None
 
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
+    insert, update = crawler.split_into_inserts_and_updates(st)
     assert len(insert) == 1
     assert insert[0].name == "B"
     assert len(update) == 1
@@ -316,49 +357,20 @@ def test_split_into_inserts_and_updates_single(crawler_mocked_identifiable_retri
     crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
 
 
-def test_split_into_inserts_and_updates_with_duplicate(crawler_mocked_identifiable_retrieve):
+def test_split_into_inserts_and_updates_with_circ(crawler_mocked_identifiable_retrieve):
+    # test trying to split circular dependency
     crawler = crawler_mocked_identifiable_retrieve
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    # This is identical to a and should be removed
-    c = db.Record(name="A").add_parent("C")
-    entlist = [a, b, c]
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-
-def test_split_into_inserts_and_updates_with_ref(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
-    # try it with a reference
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent('C').add_property(name='a')
+    )
+    # two records that reference each other via identifying properties
+    a = db.Record().add_parent("C")
+    b = db.Record().add_parent("C").add_property(name='a', value=a)
+    a.add_property(name='a', value=b)
 
-def test_split_into_inserts_and_updates_with_circ():
-    # try circular
-    a = db.Record(name="A").add_parent("C")
-    b = db.Record(name="B").add_parent("C")
-    b.add_property("A", a)
-    a.add_property("B", b)
-    entlist = [a, b]
-    # TODO this does not seem to be complete!
+    st = SyncGraph([a, b], crawler.identifiableAdapter)
+    with pytest.raises(RuntimeError):
+        crawler.split_into_inserts_and_updates(st)
 
 
 def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable_retrieve):
@@ -372,11 +384,12 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
     b = db.Record(name="B").add_parent("C")
     g = db.Record(name="G").add_parent("C")
     f = db.Record(name="F").add_parent("C")
-    g.add_property("A", a)
-    b.add_property("A", f)
+    g.add_property("C", b)
     b.add_property("A", a)
+    b.add_property("C", f)
     entlist = [a, b, g]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    insert, update = crawler.split_into_inserts_and_updates(st)
     assert len(insert) == 3
     assert "B" in [el.name for el in insert]
     assert len(update) == 1
@@ -388,23 +401,8 @@ def test_split_into_inserts_and_updates_with_complex(crawler_mocked_identifiable
     # TODO write test where the unresoled entity is not part of the identifiable
 
 
-def test_split_into_inserts_and_updates_with_copy_attr(crawler_mocked_identifiable_retrieve):
-    crawler = crawler_mocked_identifiable_retrieve
-    # assume identifiable is only the name
-    a = db.Record(name="A").add_parent("C")
-    a.add_property("foo", 1)
-    b = db.Record(name="A").add_parent("C")
-    b.add_property("bar", 2)
-    entlist = [a, b]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
-
-    assert update[0].get_property("bar").value == 2
-    assert update[0].get_property("foo").value == 1
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable.assert_called()
-
-
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
 @patch("caoscrawler.identifiable_adapters.cached_query",
        new=Mock(side_effect=mock_cached_only_rt))
 def test_split_iiau_with_unmergeable_list_items():
@@ -440,6 +438,12 @@ b1: ("same", c1)
 b2: ("same", c2)
 
 a: ([b1, b2])
+
+
+
+- a can be identified.
+- bs can be identified with each other once a is identified
+- cs depend on b(s), but cannot be put in one Entity because they have conflicting properties
     """
     prop_ident = db.Property("prop_ident", datatype=db.INTEGER)
     prop_other = db.Property("prop_ident", datatype=db.INTEGER)
@@ -472,82 +476,108 @@ a: ([b1, b2])
 
     crawler = Crawler(identifiableAdapter=ident_adapter)
 
-    with raises(RuntimeError) as rte:
-        crawler.synchronize(commit_changes=False,
-                            crawled_data=[rec_a, *rec_b, *rec_c])
-    assert not isinstance(rte.value, NotImplementedError), \
-        "Exception must not be NotImplementedError, but plain RuntimeError."
-    assert "Could not find referencing entities" in rte.value.args[0]
-    assert "merge conflicts in the referencing" in rte.value.args[0]
+    st = SyncGraph(deepcopy([rec_a, *rec_b, *rec_c]), crawler.identifiableAdapter)
+    assert st._identity_relies_on_unchecked_entity(st.nodes[0]) is False
+    assert st._identity_relies_on_unchecked_entity(st.nodes[1])
+    assert st._identity_relies_on_unchecked_entity(st.nodes[2])
+    assert st._identity_relies_on_unchecked_entity(st.nodes[3])
+    assert st._identity_relies_on_unchecked_entity(st.nodes[4])
+    assert len(st.unchecked) == 5
+
+    # The Cs cannot be merged due to different identifying properties
+    # The Bs cannot be merged due to different references to Cs
+    with raises(ImpossibleMergeError) as rte:
+        crawler.split_into_inserts_and_updates(st)
+    # TODO
+    # assert not isinstance(rte.value, NotImplementedError), \
+        # "Exception must not be NotImplementedError, but plain RuntimeError."
+    # assert "Could not find referencing entities" in rte.value.args[0]
+    # assert "merge conflicts in the referencing" in rte.value.args[0]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
+    # test that backrefs are appropriately considered in the identifiable
+    crawler = crawler_mocked_for_backref_test
+    identlist = [Identifiable(name="A", record_type="BR"),
+                 Identifiable(name="B", record_type="C", backrefs=[db.Entity()])]
+    referenced = db.Record(name="B").add_parent("C")
+    entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
 
+    # Test without referencing object
+    # currently a RuntimeError is raised if necessary properties are missing.
+    with raises(MissingReferencingEntityError):
+        st = SyncGraph([db.Record(name="B").add_parent("C")], crawler.identifiableAdapter)
 
-def test_has_missing_object_in_references():
-    crawler = Crawler()
-    # Simulate remote server content by using the names to identify records
-    # There are only two known Records with name A and B
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"C": db.Record(name="C").add_parent("RTC")
-                                               .add_property("d").add_property("name"),
-                                               "D": db.Record(name="D").add_parent("RTD")
-                                               .add_property("d").add_property("e").add_property("name"),
-                                               }))
-
-    # one reference with id -> check
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123}), {})
-    # one ref with Entity with id -> check
-    rec = db.Record(id=123).add_parent("C")
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': rec}), {id(rec): {'C': [None]}})
-    # one ref with id one with Entity with id (mixed) -> check
-    rec = db.Record(id=123).add_parent("RTC")
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTD",
-                     properties={'d': 123, 'b': rec}), {id(rec): {'C': [None]}})
-    # entity to be referenced in the following
-    a = db.Record(name="C").add_parent("C").add_property("d", 12311)
-    # one ref with id one with Entity without id (but not identifying) -> fail
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="C", record_type="RTC", properties={'d': 123, 'e': a}),
-        {id(a): {'C': [None]}})
-
-    # one ref with id one with Entity without id (mixed) -> fail
-    assert not crawler._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
-        {id(a): {'C': [None]}})
-
-    crawler.treated_records_lookup.add(a, Identifiable(name="C", record_type="RTC",
-                                                       properties={'d': 12311}))
-    # one ref with id one with Entity without id but in cache -> check
-    assert crawler._has_missing_object_in_references(
-        Identifiable(name="D", record_type="RTD", properties={'d': 123, 'e': a}),
-        {id(a): {'C': [None]}})
+    # identifiables were not yet checked
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    assert st.get_equivalent(st.nodes[1]) is None
+    assert st.get_equivalent(st.nodes[0]) is None
+    # one can be found remotely, one not
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        identlist[0]).id == 1111
+    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
+        identlist[1]) is None
 
-    # if this ever fails, the mock up may be removed
-    crawler.identifiableAdapter.get_registered_identifiable.assert_called()
+    # check the split...
+    insert, update = crawler.split_into_inserts_and_updates(st)
+    # A was found remotely and is therefore in the update list
+    assert len(update) == 1
+    assert update[0].name == "A"
+    # B does not exist on the (simulated) remote server
+    assert len(insert) == 1
+    assert insert[0].name == "B"
 
 
-@ pytest.mark.xfail()
-def test_references_entities_without_ids():
-    crawler = Crawler()
-    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                       .add_property('last_name', 123)
-                                                       .add_property('first_name', 123))
-    # id and rec with id
-    assert not crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                       .add_property('first_name', 123)
-                                                       .add_property('last_name',
-                                                                     db.Record(id=123)))
-    # id and rec with id and one unneeded prop
-    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                   .add_property('first_name', 123)
-                                                   .add_property('stuff', db.Record())
-                                                   .add_property('last_name', db.Record(id=123)))
-
-    # one identifying prop is missing
-    assert crawler._has_reference_value_without_id(db.Record().add_parent("Person")
-                                                   .add_property('first_name', 123)
-                                                   .add_property('last_name', db.Record()))
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
+    # test whether multiple references of the same record type are correctly used
+    crawler = crawler_mocked_for_backref_test
+    referenced = db.Record(name="B").add_parent("C")
+    entlist = [referenced,
+               db.Record(id=1, name="A").add_parent("BR").add_property("ref", referenced),
+               db.Record(id=2, name="C").add_parent("BR").add_property("ref", referenced),
+               ]
+
+    # test whether both entities are listed in the backref attribute of the identifiable
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+
+    identifiable = crawler.identifiableAdapter.get_identifiable(
+        st.nodes[0],
+        st.backward_references_backref[id(st.nodes[0])])
+    assert len(identifiable.backrefs) == 2
+
+    # check the split...
+    insert, update = crawler.split_into_inserts_and_updates(st)
+    assert len(update) == 2
+    assert len(insert) == 1
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
+    # test whether multiple references of the different record types are correctly used
+    crawler = crawler_mocked_for_backref_test
+    referenced = db.Record(name="B").add_parent("D")
+    entlist = [referenced,
+               db.Record(id=1, name="A").add_parent("BR").add_property("ref", referenced),
+               db.Record(id=2, name="A").add_parent("BR2").add_property("ref", referenced),
+               ]
+
+    # test whether both entities are listed in the backref attribute of the identifiable
+    st = SyncGraph(entlist, crawler.identifiableAdapter)
+    identifiable = crawler.identifiableAdapter.get_identifiable(
+        st.nodes[0],
+        st.backward_references_backref[id(st.nodes[0])])
+
+    assert len(identifiable.backrefs) == 2
+
+    # check the split...
+    insert, update = crawler.split_into_inserts_and_updates(st)
+    assert len(update) == 2
+    assert len(insert) == 1
 
 
 def test_replace_entities_with_ids():
@@ -562,20 +592,15 @@ def test_replace_entities_with_ids():
     assert a.get_property("C").value == [12345, 233324]
 
 
-def reset_mocks(mocks):
-    for mock in mocks:
-        mock.reset_mock()
-
-
-@ patch("caoscrawler.crawl.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
-        "retrieve_identified_record_for_identifiable",
-        new=Mock(side_effect=mock_retrieve_record))
-@ patch("caoscrawler.crawl.db.Container.insert")
-@ patch("caoscrawler.crawl.db.Container.update")
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+       "retrieve_identified_record_for_identifiable",
+       new=Mock(side_effect=mock_retrieve_record))
+@patch("caoscrawler.crawl.db.Container.insert")
+@patch("caoscrawler.crawl.db.Container.update")
 def test_synchronization_no_commit(upmock, insmock):
     crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
     # change  one; add one
@@ -592,20 +617,19 @@ def test_synchronization_no_commit(upmock, insmock):
     assert len(ups) == 1
 
 
-@ patch("caoscrawler.crawl.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by))
-@ patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
-        "retrieve_identified_record_for_identifiable",
-        new=Mock(side_effect=mock_retrieve_record))
-@ patch("caoscrawler.crawl.db.Container.insert")
-@ patch("caoscrawler.crawl.db.Container.update")
-@ patch("caoscrawler.crawl.UpdateCache.insert")
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+@patch("caoscrawler.identifiable_adapters.CaosDBIdentifiableAdapter."
+       "retrieve_identified_record_for_identifiable",
+       new=Mock(side_effect=mock_retrieve_record))
+@patch("caoscrawler.crawl.db.Container.insert")
+@patch("caoscrawler.crawl.db.Container.update")
+@patch("caoscrawler.crawl.UpdateCache.insert")
 def test_security_mode(updateCacheMock, upmock, insmock):
     # trivial case: nothing to do
     crawled_data = [r.copy() for r in EXAMPLE_SERVER_STATE if r.role == "Record"]
-    print(crawled_data)
     crawler = Crawler(securityMode=SecurityMode.RETRIEVE)
     crawler.synchronize(commit_changes=True, crawled_data=crawled_data)
     assert crawler.run_id is not None
@@ -640,9 +664,6 @@ def test_security_mode(updateCacheMock, upmock, insmock):
     assert crawler.run_id is not None
     insmock.assert_not_called()
     upmock.assert_not_called()
-    # import IPython
-    # IPython.embed()
-    # print(updateCacheMock.call_args_list)
     assert updateCacheMock.call_count == 1
     # reset counts
     reset_mocks([updateCacheMock, insmock, upmock])
@@ -698,65 +719,6 @@ def test_security_mode(updateCacheMock, upmock, insmock):
     crawled_data[-1] = EXAMPLE_SERVER_STATE[-1].copy()
 
 
-def test_create_reference_mapping():
-    a = db.Record().add_parent("A")
-    b = db.Record(id=132).add_parent("B").add_property('a', a)
-    ref = Crawler.create_reference_mapping([a, b])
-    assert id(a) in ref
-    assert id(b) in ref
-    assert "B" in ref[id(a)]
-    assert {} == ref[id(b)]
-    assert ref[id(a)]["B"] == [132]
-
-
-def test_create_flat_list():
-    a = db.Record()
-    b = db.Record()
-    a.add_property(name="a", value=a)
-    a.add_property(name="b", value=b)
-    flat = Crawler.create_flat_list([a])
-    assert len(flat) == 2
-    assert a in flat
-    assert b in flat
-    c = db.Record()
-    c.add_property(name="a", value=a)
-    # This would caus recursion if it is not dealt with properly.
-    a.add_property(name="c", value=c)
-    flat = Crawler.create_flat_list([c])
-    assert len(flat) == 3
-    assert a in flat
-    assert b in flat
-    assert c in flat
-
-
-@ pytest.fixture
-def crawler_mocked_for_backref_test():
-    crawler = Crawler()
-    # mock retrieval of registered identifiabls: return Record with just a parent
-
-    def get_reg_ident(x):
-        if x.parents[0].name == "C":
-            return db.Record().add_parent(x.parents[0].name).add_property(
-                "is_referenced_by", value=["BR"]).add_property("name")
-        elif x.parents[0].name == "D":
-            return db.Record().add_parent(x.parents[0].name).add_property(
-                "is_referenced_by", value=["BR", "BR2"]).add_property("name")
-        else:
-            return db.Record().add_parent(x.parents[0].name).add_property("name")
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(side_effect=get_reg_ident)
-
-    # Simulate remote server content by using the names to identify records
-    # There is only a single known Record with name A
-    crawler.identifiableAdapter.retrieve_identified_record_for_record = Mock(side_effect=partial(
-        basic_retrieve_by_name_mock_up, known={"A":
-                                               db.Record(id=1111, name="A").add_parent("BR")}))
-    crawler.identifiableAdapter.retrieve_identified_record_for_identifiable = Mock(
-        side_effect=partial(
-            basic_retrieve_by_name_mock_up, known={"A":
-                                                   db.Record(id=1111, name="A").add_parent("BR")}))
-    return crawler
-
-
 def test_validation_error_print(caplog):
     caplog.set_level(logging.DEBUG, logger="caoscrawler.converters")
     # there should be no server interaction since we only test the behavior if a validation error
@@ -773,96 +735,7 @@ def test_validation_error_print(caplog):
         caplog.clear()
 
 
-@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-        new=Mock(side_effect=lambda x: [x]))
-def test_split_into_inserts_and_updates_backref(crawler_mocked_for_backref_test):
-    crawler = crawler_mocked_for_backref_test
-    identlist = [Identifiable(name="A", record_type="BR"),
-                 Identifiable(name="B", record_type="C", backrefs=[db.Entity()])]
-    referenced = db.Record(name="B").add_parent("C")
-    entlist = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
-
-    # Test without referencing object
-    # currently a RuntimeError is raised if necessary properties are missing.
-    with raises(RuntimeError):
-        crawler.split_into_inserts_and_updates([db.Record(name="B").add_parent("C")])
-
-    # identifiables were not yet checked
-    assert crawler.treated_records_lookup.get_any(entlist[1], identlist[0]) is None
-    assert crawler.treated_records_lookup.get_any(entlist[0], identlist[1]) is None
-    # one with reference, one without
-    assert not crawler._has_reference_value_without_id(identlist[0])
-    assert crawler._has_reference_value_without_id(identlist[1])
-    # one can be found remotely, one not
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[0]).id == 1111
-    assert crawler.identifiableAdapter.retrieve_identified_record_for_record(
-        identlist[1]) is None
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    # A was found remotely and is therefore in the update list
-    assert len(update) == 1
-    assert update[0].name == "A"
-    # B does not exist on the (simulated) remote server
-    assert len(insert) == 1
-    assert insert[0].name == "B"
-
-
-@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-        new=Mock(side_effect=lambda x: [x]))
-def test_split_into_inserts_and_updates_mult_backref(crawler_mocked_for_backref_test):
-    # test whether multiple references of the same record type are correctly used
-    crawler = crawler_mocked_for_backref_test
-    referenced = db.Record(name="B").add_parent("C")
-    entlist = [referenced,
-               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
-               db.Record(name="C").add_parent("BR").add_property("ref", referenced),
-               ]
-
-    # test whether both entities are listed in the backref attribute of the identifiable
-    referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(
-        referenced,
-        referencing_entities[id(referenced)])
-    assert len(identifiable.backrefs) == 2
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(update) == 1
-    assert len(insert) == 2
-
-
-@ patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-        new=Mock(side_effect=lambda x: [x]))
-def test_split_into_inserts_and_updates_diff_backref(crawler_mocked_for_backref_test):
-    # test whether multiple references of the different record types are correctly used
-    crawler = crawler_mocked_for_backref_test
-    referenced = db.Record(name="B").add_parent("D")
-    entlist = [referenced,
-               db.Record(name="A").add_parent("BR").add_property("ref", referenced),
-               db.Record(name="A").add_parent("BR2").add_property("ref", referenced),
-               ]
-
-    # test whether both entities are listed in the backref attribute of the identifiable
-    referencing_entities = crawler.create_reference_mapping(entlist)
-    identifiable = crawler.identifiableAdapter.get_identifiable(
-        referenced,
-        referencing_entities[id(referenced)])
-
-    assert len(identifiable.backrefs) == 2
-
-    # check the split...
-    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
-    assert len(update) == 2
-    assert len(insert) == 1
-
-
-def mock_create_values(values, element):
-    pass
-
-
-@ patch("caoscrawler.converters.IntegerElementConverter.create_values")
+@patch("caoscrawler.converters.IntegerElementConverter.create_values")
 def test_restricted_path(create_mock):
     """
     The restricted_path argument allows to ignroe part of the crawled data structure. Here, we make
@@ -955,7 +828,7 @@ def test_split_restricted_path():
 
 # Filter the warning because we want to have it here and this way it does not hinder running
 # tests with -Werror.
-@ pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:The prefix:DeprecationWarning")
 def test_deprecated_prefix_option():
     """Test that calling the crawler's main function with the deprecated
     `prefix` option raises the correct errors and warnings.
@@ -993,36 +866,8 @@ def test_create_entity_summary():
     assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text
 
 
-def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog):
-    crawler = crawler_mocked_identifiable_retrieve
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(
-        side_effect=lambda x: db.Record().add_parent('C').add_property(name='C'))
-    a = db.Record(name='a').add_parent("C")
-    b = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
-    c = db.Record(name='c').add_parent("C").add_property(name='D', value='e'
-                                                         ).add_property(name="C", value=b)
-    d = db.Record(name='c').add_parent("C")
-    a.add_property(name="C", value=c)
-    flat = [a, b, c]
-    circle = Crawler.detect_circular_dependency(flat)
-    assert [id(el) for el in circle] == [id(el) for el in [a, c, b, a]]
-
-    assert Crawler.detect_circular_dependency([d]) is None
-    with raises(RuntimeError):
-        _, _ = crawler.split_into_inserts_and_updates(flat)
-    caplog.set_level(logging.ERROR, logger="caoscrawler.converters")
-    assert "Found circular dependency" in caplog.text
-    assert "\n--------\n\n> Parent: C\n\n>> Name: a\n[\'C\']" in caplog.text
-    caplog.clear()
-
-
-def mock_get_entity_by_query(query=None):
-    if query is not None:
-        return db.Record(id=1111, name='rec_name').add_parent('RT')
-
-
-@ patch("caoscrawler.crawl.cached_get_entity_by",
-        new=Mock(side_effect=mock_get_entity_by_query))
+@patch("caoscrawler.crawl.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by_query))
 def test_replace_name_with_referenced_entity():
     test_text = 'lkajsdf'
     test_int = 134343
@@ -1090,72 +935,3 @@ def test_replace_name_with_referenced_entity():
     assert isinstance(prop.value[2], int)
     assert prop.value[2] == test_id
     assert caoscrawler.crawl.cached_get_entity_by.call_count == 3
-
-
-def test_treated_record_lookup():
-    trlu = TreatedRecordLookUp()
-    exist = db.Record(id=1)
-    trlu.add(exist)
-    assert len(trlu._existing) == 1
-    # was added to existing
-    assert trlu._existing[id(exist)] is exist
-    # is in ID lookup
-    assert trlu._id_look_up[exist.id] is exist
-    # can be accessed via get_existing
-    assert trlu.get_existing(db.Record(id=1)) is exist
-
-    miss = db.Record()
-    # exception when identifiable is missing
-    with raises(RuntimeError):
-        trlu.add(miss)
-    ident = Identifiable(name='a')
-    trlu.add(miss, ident)
-    # was added to missing
-    assert trlu._missing[id(miss)] is miss
-    # is in ident lookup
-    assert trlu._identifiable_look_up[ident.get_representation()] is miss
-    # can be accessed via get_missing
-    assert trlu.get_missing(db.Record(), Identifiable(name='a')) is miss
-
-    fi = db.File(path='a', id=2)
-    trlu.add(fi)
-    assert len(trlu._existing) == 2
-    # was added to existing
-    assert trlu._existing[id(fi)] is fi
-    # is in ID lookup
-    assert trlu._id_look_up[fi.id] is fi
-    # is in path lookup
-    assert trlu._path_look_up[fi.path] is fi
-    # can be accessed via get_existing
-    assert trlu.get_existing(fi) is fi
-
-    all_exi = trlu.get_existing_list()
-    assert fi in all_exi
-    assert exist in all_exi
-    all_mi = trlu.get_missing_list()
-    assert miss in all_mi
-
-    # If a Record was added using the ID, the ID must be used to identify it even though later an
-    # identifiable may be passed as well
-    assert trlu.get_any(exist, Identifiable(name='b')) is exist
-
-    fi2 = db.File(path='b')
-    trlu.add(fi2)
-    assert trlu.get_any(db.File(path='b'), Identifiable(name='c')) is fi2
-
-
-def test_merge_entity_with_identifying_reference(crawler_mocked_identifiable_retrieve):
-    # When one python object representing a record is merged into another python object
-    # representing the same record, the former object can be forgotten and references from it to
-    # other records must not play a role
-    crawler = crawler_mocked_identifiable_retrieve
-    crawler.identifiableAdapter.get_registered_identifiable = Mock(
-        side_effect=lambda x: db.Record().add_parent('C').add_property(name='name') if
-        x.parents[0].name == "C" else
-        db.Record().add_parent('D').add_property(name='is_referenced_by', value="*")
-    )
-    a = db.Record(name='a').add_parent("D")
-    b = db.Record(name='b').add_parent("C")
-    c = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
-    flat = [a, c, b]
-    _, _ = crawler.split_into_inserts_and_updates(flat)
diff --git a/unittests/test_file_identifiables.py b/unittests/test_file_identifiables.py
deleted file mode 100644
index 4ec02aa3fc497f8dc35adc709533ef5b35066f3a..0000000000000000000000000000000000000000
--- a/unittests/test_file_identifiables.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/python
-# Tests for file identifiables
-# A. Schlemmer, 06/2021
-
-from unittest.mock import Mock, patch
-
-import caosdb as db
-import pytest
-from caoscrawler.identifiable import Identifiable
-from caoscrawler.identifiable_adapters import LocalStorageIdentifiableAdapter
-from caosdb.cached import cache_clear
-from caosdb.exceptions import EmptyUniqueQueryError
-from pytest import raises
-
-from test_crawler import mock_get_entity_by
-
-
-@pytest.fixture(autouse=True)
-def clear_cache():
-    cache_clear()
-
-
-@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
-       new=Mock(side_effect=id))
-@patch("caoscrawler.identifiable_adapters.cached_get_entity_by",
-       new=Mock(side_effect=mock_get_entity_by))
-def test_file_identifiable():
-    ident = LocalStorageIdentifiableAdapter()
-
-    # Without a path there is no identifying information
-    with raises(ValueError):
-        ident.get_identifiable(db.File(), [])
-
-    fp = "/test/bla/bla.txt"
-    file_obj = db.File(path=fp)
-    identifiable = ident.get_identifiable(file_obj)
-
-    # the path is copied to the identifiable
-    assert fp == identifiable.path
-    assert isinstance(identifiable, Identifiable)
-
-    # __eq__ function is only defined for Identifiable objects
-    with raises(ValueError):
-        file_obj != identifiable
-
-    # since the path does not exist in the data in ident, the follwoing functions return None
-    with raises(EmptyUniqueQueryError):
-        ident.retrieve_identified_record_for_record(file_obj)
-    assert ident.get_file(identifiable) is None
-
-    # Try again with actual files in the store:
-    records = ident.get_records()
-    test_record_wrong_path = db.File(path="/bla/bla/test.txt")
-    test_record_correct_path = db.File(path="/test/bla/bla.txt")
-    test_record_alsocorrect_path = db.File(path="/test/bla/bla.txt")
-    records.append(test_record_wrong_path)
-    # Now, there is a file, but still wrong path -> result is still None
-    identified_file = ident.get_file(file_obj)
-    assert identified_file is None
-
-    records.append(test_record_correct_path)
-    # now there is a match
-    identified_file = ident.get_file(file_obj)
-    assert identified_file is not None
-    assert identified_file.path == file_obj.path
-
-    with raises(RuntimeError, match=".*unambigiously.*"):
-        records.append(test_record_alsocorrect_path)
-        identified_file = ident.get_file(file_obj)
diff --git a/unittests/test_identifiable.py b/unittests/test_identifiable.py
index 28bdb7a2ad75d5b9389b47ca3f0ec2b2e2a1404b..074c3843e351b20d17813a661974fdc59ca0442a 100644
--- a/unittests/test_identifiable.py
+++ b/unittests/test_identifiable.py
@@ -27,6 +27,7 @@ test identifiable module
 import caosdb as db
 import pytest
 from caoscrawler.identifiable import Identifiable
+from caoscrawler.sync_node import SyncNode
 
 
 def test_create_hashable_string():
@@ -42,25 +43,20 @@ def test_create_hashable_string():
     assert (
         Identifiable._create_hashable_string(
             Identifiable(name="A", record_type="B",
-                         properties={'a': db.Record(id=12)})
+                         properties={'a': SyncNode(db.Record(id=12))})
         ) == "P<B>N<A>R<[]>a:12")
     a = Identifiable._create_hashable_string(
-        Identifiable(name="A", record_type="B", properties={'a': [db.Record(id=12)]}))
+        Identifiable(name="A", record_type="B", properties={'a': [SyncNode(db.Record(id=12))]}))
     assert (a == "P<B>N<A>R<[]>a:[12]")
     assert (Identifiable._create_hashable_string(
         Identifiable(name="A", record_type="B", properties={'a': [12]})) == "P<B>N<A>R<[]>a:[12]")
     assert (
         Identifiable._create_hashable_string(
             Identifiable(name="A", record_type="B", properties={
-                         'a': [db.Record(id=12), 11]})
+                         'a': [SyncNode(db.Record(id=12)), 11]})
         ) == "P<B>N<A>R<[]>a:[12, 11]")
-    assert (
-        Identifiable._create_hashable_string(
-            Identifiable(record_type="B", properties={'a': [db.Record()]})
-        ) != Identifiable._create_hashable_string(
-            Identifiable(record_type="B", properties={'a': [db.Record()]})))
     assert Identifiable._create_hashable_string(
-        Identifiable(name="A", record_type="B", backrefs=[123, db.Entity(id=124)],
+        Identifiable(name="A", record_type="B", backrefs=[123, SyncNode(db.Record(id=124))],
                      properties={'a': 5})) == "P<B>N<A>R<['123', '124']>a:5"
 
 
@@ -73,9 +69,9 @@ def test_repr():
     # only test that something meaningful is returned
     assert 'properties' in str(Identifiable(name="A", record_type="B"))
     assert str(Identifiable(name="A", record_type="B", properties={'a': 0})).split(
-        "properties:\n")[1].split('\n')[0] == '{"a": 0}'
+        "properties:\n")[1].split('\n')[0] == '{"a": "0"}'
     assert str(Identifiable(name="A", record_type="B", properties={'a': 0, 'b': "test"})).split(
-        "properties:\n")[1].split('\n')[0] == '{"a": 0, "b": "test"}'
+        "properties:\n")[1].split('\n')[0] == '{"a": "0", "b": "test"}'
 
     # TODO(henrik): Add a test using backrefs once that's implemented.
 
@@ -87,13 +83,5 @@ def test_equality():
         record_id=12, properties={"a": 0}) != Identifiable(record_id=13, properties={"a": 0})
     assert Identifiable(
         record_id=12, properties={"a": 0}) == Identifiable(properties={"a": 0})
-    assert Identifiable(
-        path="a", properties={"a": 0}) != Identifiable(path="b", properties={"a": 0})
-    assert Identifiable(
-        path="a", properties={"a": 0}) == Identifiable(path="a", properties={"a": 1})
-    assert Identifiable(
-        path="a", properties={"a": 0}) == Identifiable(properties={"a": 0})
-    assert Identifiable(properties={"a": 0}) == Identifiable(
-        properties={"a": 0})
-    assert Identifiable(properties={"a": 0}) != Identifiable(
-        properties={"a": 1})
+    assert Identifiable(properties={"a": 0}) == Identifiable(properties={"a": 0})
+    assert Identifiable(properties={"a": 0}) != Identifiable(properties={"a": 1})
diff --git a/unittests/test_identifiable_adapters.py b/unittests/test_identifiable_adapters.py
index ee0e0d6cd7c791f78e7cd2307dc6f34698326b4a..bdcfeacb6dea514ad689156bf2f61e712c665a4e 100644
--- a/unittests/test_identifiable_adapters.py
+++ b/unittests/test_identifiable_adapters.py
@@ -29,6 +29,7 @@ test identifiable_adapters module
 
 import os
 from datetime import datetime
+from unittest.mock import MagicMock, Mock, patch
 from pathlib import Path
 
 import caosdb as db
@@ -37,6 +38,7 @@ from caoscrawler.identifiable import Identifiable
 from caoscrawler.identifiable_adapters import (CaosDBIdentifiableAdapter,
                                                IdentifiableAdapter,
                                                convert_value)
+from caoscrawler.sync_graph import SyncNode
 
 UNITTESTDIR = Path(__file__).parent
 
@@ -122,28 +124,25 @@ def test_load_from_yaml_file():
 
 def test_non_default_name():
     ident = CaosDBIdentifiableAdapter()
-    ident.register_identifiable(
-        "Person", db.RecordType()
-        .add_parent(name="Person")
-        .add_property(name="last_name"))
-    identifiable = ident.get_identifiable(db.Record(name="don't touch it")
+    identifiable = ident.get_identifiable(SyncNode(db.Record(name="don't touch it")
                                           .add_parent("Person")
-                                          .add_property(name="last_name", value='Tom')
-                                          )
+                                          .add_property(name="last_name", value='Tom'), db.RecordType()
+                                          .add_parent(name="Person")
+                                          .add_property(name="last_name")), [])
     assert identifiable.name is None
 
 
 def test_wildcard_ref():
     ident = CaosDBIdentifiableAdapter()
-    ident.register_identifiable(
-        "Person", db.RecordType()
-        .add_parent(name="Person")
-        .add_property(name="is_referenced_by", value=["*"]))
     rec = (db.Record(name="don't touch it").add_parent("Person")
            .add_property(name="last_name", value='Tom'))
-    identifiable = ident.get_identifiable(rec,
-                                          referencing_entities={
-                                              'A': [1]}
+    dummy = SyncNode(db.Record(), None)
+    dummy.id = 1
+    identifiable = ident.get_identifiable(SyncNode(rec, db.RecordType()
+                                                   .add_parent(name="Person")
+                                                   .add_property(name="is_referenced_by", value=["*"])),
+
+                                          [dummy]
                                           )
     assert identifiable.backrefs[0] == 1
 
@@ -158,25 +157,63 @@ def test_convert_value():
 
 
 def test_get_identifiable():
-    # TODO modify this such that it becomes a test that acutally tests (sufficiently) the
-    # get_identifable function
-
     ident = CaosDBIdentifiableAdapter()
     ident.load_from_yaml_definition(UNITTESTDIR / "example_identifiables.yml")
-    r_cur = (db.Record(id=5)
-             .add_parent(name="Experiment", id=3)
-             .add_property(name="date", value="2022-02-01")
-             .add_property(name="result", value="FAIL"))
-    id_r0 = ident.get_identifiable(r_cur)
-    assert r_cur.parents[0].name == id_r0.record_type
-    assert r_cur.get_property(
-        "date").value == id_r0.properties["date"]
-    assert len(r_cur.parents) == 1
-    assert len(r_cur.properties) == 2
+    rec = (db.Record(id=5)
+           .add_parent(name="Experiment", id=3)
+           .add_property(name="date", value="2022-02-01")
+           .add_property(name="result", value="FAIL"))
+    se = SyncNode(rec,
+                  ident.get_registered_identifiable(rec))
+    id_r0 = ident.get_identifiable(se, [])
+    assert rec.parents[0].name == id_r0.record_type
+    assert rec.get_property("date").value == id_r0.properties["date"]
+    assert len(rec.parents) == 1
+    assert len(rec.properties) == 2
     assert len(id_r0.properties) == 1
 
+    ident = CaosDBIdentifiableAdapter()
+    ident_a = db.RecordType(name="A").add_parent("A").add_property("name").add_property("a")
+    ident.register_identifiable("A", ident_a)
+    rec = (db.Record(id=5)
+           .add_parent(name="A", id=3)
+           .add_property(name="a", value="2022-02-01")
+           .add_property(name="result", value="FAIL"))
+    se = SyncNode(rec, ident.get_registered_identifiable(rec))
+    for el in [
+        db.Record()
+        .add_parent(name="A", id=3)
+        .add_property(name="a", value="2022-02-01")
+        .add_property(name="result", value="FAIL"),
+        db.Record(name='a')
+        .add_parent(name="A", id=3)
+        .add_property(name="a", value="2022-02-01")
+        .add_property(name="result", value="FAIL"),
+    ]:
+        se.update(SyncNode(el))
+
+    id_r0 = ident.get_identifiable(se, [])
+    assert "A" == id_r0.record_type
+    assert "2022-02-01" == id_r0.properties["a"]
+    assert 'a' == id_r0.name
+    assert len(id_r0.properties) == 1
+
+    rec = (db.Record(name='a')
+           .add_parent(name="A")
+           .add_property(name="a", value="2")
+           )
+    se = SyncNode(rec, ident.get_registered_identifiable(rec))
+    se.update(SyncNode(
+        db.Record(name='a')
+        .add_parent(name="A")
+        .add_property(name="a", value="3")
+    ))
+
+    with pytest.raises(RuntimeError):
+        id_r0 = ident.get_identifiable(se, [])
 
-@pytest.mark.xfail
+
+@ pytest.mark.xfail
 def test_retrieve_identified_record_for_identifiable():
     # TODO modify this such that it becomes a test that acutally tests (sufficiently) the
     # retrieve_identified_record_for_identifiable function
@@ -190,7 +227,7 @@ def test_retrieve_identified_record_for_identifiable():
             r_cur = r
             break
 
-    id_r1 = ident.get_identifiable(r_cur)
+    id_r1 = ident.get_identifiable(r_cur, [])
     assert r_cur.parents[0].name == id_r1.record_type
     assert r_cur.get_property(
         "identifier").value == id_r1.properties["identifier"]
@@ -211,3 +248,19 @@ def test_retrieve_identified_record_for_identifiable():
     assert r_cur.get_property(
         "responsible").value == idr_r1.get_property("responsible").value
     assert r_cur.description == idr_r1.description
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_referencing_entity_has_appropriate_type():
+    dummy = db.Record().add_parent("A")
+    registered_identifiable = db.RecordType()
+    rft = IdentifiableAdapter.referencing_entity_has_appropriate_type
+    assert not rft([], registered_identifiable)
+    assert not rft(dummy.parents, registered_identifiable)
+    registered_identifiable.add_property("is_referenced_by", "B")
+    assert not rft(dummy.parents, registered_identifiable)
+    registered_identifiable.properties[0].value = ["B", "A"]
+    assert rft(dummy.parents, registered_identifiable)
+    registered_identifiable.properties[0].value = ["B", "*"]
+    assert rft(dummy.parents, registered_identifiable)
diff --git a/unittests/test_sync_graph.py b/unittests/test_sync_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c63cb54aceeaef98df36630ba0873cd62ebf7e3
--- /dev/null
+++ b/unittests/test_sync_graph.py
@@ -0,0 +1,651 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+from functools import partial
+from unittest.mock import MagicMock, Mock, patch
+
+import linkahead as db
+import pytest
+from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by
+
+from caoscrawler.exceptions import (ImpossibleMergeError,
+                                    MissingIdentifyingProperty)
+from caoscrawler.identifiable import Identifiable
+from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.sync_graph import SyncGraph, _set_each_scalar_value
+from caoscrawler.sync_node import SyncNode, parent_in_list, property_in_list
+
+from itertools import product
+
+
+@pytest.fixture
+def simple_adapter():
+    # different RTs with different registered identifiables to allow to test various behavior
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_adapter.register_identifiable(
+        "RT1",
+        db.RecordType().add_parent("RT1").add_property("RT2"))
+    ident_adapter.register_identifiable(
+        "RT2",
+        db.RecordType().add_parent("RT2").add_property("is_referenced_by", ["RT1", "RT3"]))
+    ident_adapter.register_identifiable(
+        "RT3",
+        db.RecordType().add_parent("RT3").add_property("a"))
+    ident_adapter.register_identifiable(
+        "RT4",
+        db.RecordType().add_parent("RT4").add_property("RT3"))
+    ident_adapter.register_identifiable(
+        "RT5",
+        db.RecordType().add_parent("RT5").add_property("name"))
+    return ident_adapter
+
+
+def test_create_flat_list():
+    a = db.Record()
+    b = db.Record()
+    a.add_property(name="a", value=a)
+    a.add_property(name="b", value=b)
+    flat = SyncGraph._create_flat_list([a])
+    assert len(flat) == 2
+    assert a in flat
+    assert b in flat
+    c = db.Record()
+    c.add_property(name="a", value=a)
+    # This would cause a recursion error if it is not dealt with properly.
+    a.add_property(name="c", value=c)
+    flat = SyncGraph._create_flat_list([c])
+    assert len(flat) == 3
+    assert a in flat
+    assert b in flat
+    assert c in flat
+
+    # Test for lists:
+    a = db.Record()
+    b = db.Record()
+    d = db.Record()
+    a.add_property(name="a", value=a)
+    a.add_property(name="list", value=[b, d])
+    flat = SyncGraph._create_flat_list([a])
+    assert len(flat) == 3
+    assert a in flat
+    assert b in flat
+    assert d in flat
+
+    c = db.Record()
+    c.add_property(name="a", value=a)
+    # This would cause a recursion error if it is not dealt with properly.
+    a.add_property(name="second_list", value=[b, d, c])
+    flat = SyncGraph._create_flat_list([c])
+    assert len(flat) == 4
+    assert a in flat
+    assert b in flat
+    assert c in flat
+    assert d in flat
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_create_reference_mapping():
+    a = SyncNode(db.Record().add_parent("RT1"),
+                 db.RecordType().add_property("is_referenced_by", ["RT2"]))
+    b = SyncNode(db.Record(id=132).add_parent("RT2").add_property('a', a),
+                 db.RecordType().add_property("a"))
+    ses = [a, b]
+
+    mappings = SyncGraph._create_reference_mapping(ses)
+    # test initialization
+    for index, mapping in product((0, 1), mappings):
+        assert id(ses[index]) in mapping
+
+    (forward_references, backward_references, forward_references_id_props,
+     backward_references_id_props, forward_references_backref,
+     backward_references_backref) = mappings
+
+    # a has no ref
+    assert len(forward_references[id(a)]) == 0
+    assert backward_references[id(a)] == set([b])
+    # b does
+    assert forward_references[id(b)] == set([a])
+    assert backward_references[id(b)] == set()
+    # a has no identifying reference
+    assert forward_references_id_props[id(a)] == set()
+    assert backward_references_id_props[id(a)] == set([b])
+    # b has an identifying reference
+    assert forward_references_id_props[id(b)] == set([a])
+    assert backward_references_id_props[id(b)] == set()
+    # a has an identifying back reference
+    assert forward_references_backref[id(a)] == set()
+    assert backward_references_backref[id(a)] == set([b])
+    # b does not
+    assert forward_references_backref[id(b)] == set([a])
+    assert backward_references_backref[id(b)] == set()
+
+
+@patch("caoscrawler.sync_graph.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+def test_SyncGraph_init():
+    # trivial case
+    a = db.Record(id=101).add_parent("A")
+    ident_a = db.RecordType().add_parent("A").add_property("prop_ident")
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_adapter.register_identifiable("A", ident_a)
+    SyncGraph([a], ident_adapter)
+    SyncGraph([], ident_adapter)  # should not fail either...
+    # test whether missing identifying properties cause an exception
+    with pytest.raises(MissingIdentifyingProperty):
+        SyncGraph([db.Record().add_parent("A")], ident_adapter)
+
+    entlist = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=102).add_parent("A"),
+        db.File(path='a').add_parent("A"),
+        db.File(path='b').add_parent("A"),
+        db.Record(id=103).add_parent("A"),
+        db.Record(id=104).add_parent("A").add_property(name='prop_ident', value="MERGEME"),
+        db.Record().add_parent("A").add_property(name='prop_ident', value="MERGEME"),
+        db.File(path='a', file='b').add_parent("A"),
+        db.Record(id=101).add_parent("A"),
+        db.Record().add_parent("A").add_property(name='prop_ident', value="other"),
+        db.Record().add_parent("A").add_property(name='prop_ident',
+                                                 value=db.Record().add_parent("A")
+                                                 .add_property(name='prop_ident', value="other")),
+        db.File(path='a', file='b').add_parent("A"),
+        db.Record(id=101).add_parent("A"),
+    ]
+    st = SyncGraph(entlist, ident_adapter)
+    # all nodes with ID=101 have been merged
+    assert len([el for el in st.nodes if el.id == 101]) == 1
+    # all nodes with path='a' have been merged
+    assert len([el for el in st.nodes if el.path == 'a']) == 1
+    # all nodes with ID or path were removed from unchecked
+    for el in st.nodes:
+        if el.id is not None or el.path is not None:
+            assert el not in st.unchecked
+    # all nodes with ID are in the ID lookup
+    for el in st.nodes:
+        if el.id is not None:
+            assert st._id_look_up[el.id] is el
+    # all nodes with path are in the path lookup
+    for el in st.nodes:
+        if el.path is not None:
+            assert st._path_look_up[el.path] is el
+    # all nodes with identifiable are in the identifiable lookup
+    for el in st.nodes:
+        if el.identifiable is not None:
+            assert st._identifiable_look_up[el.identifiable.get_representation()] is el
+    # The node, which has no ID but has an identifiable,  was merged with another node with ID (due
+    # to the shared identifiable)
+    new_one = [el for el in st.nodes if len(el.properties) > 0
+               and el.properties[0].value == "MERGEME"]
+    assert len(new_one) == 1
+    assert new_one[0].id == 104
+    # every node that does not rely on something unchecked has an identifiable or an ID
+    for el in st.nodes:
+        if not st._identity_relies_on_unchecked_entity(el):
+            assert el.identifiable is not None or el.id is not None
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_merge_into_trivial(simple_adapter):
+    # simplest case: a -> c
+    #                b
+    #                (a reference c; b does not reference anything; a & b have the same target
+    #                 record)
+    c = db.Record(name='c').add_parent("RT2")
+    a = db.Record(name='a').add_parent("RT1").add_property('RT2', c)
+    b = db.Record(id=101).add_parent("RT1")
+
+    st = SyncGraph([a, b], simple_adapter)
+    se_a, se_b, se_c = st.nodes
+    assert se_a.name == 'a'
+    assert se_b.id == 101
+    assert se_c.name == 'c'
+
+    # CHECK REFERENCE MAP (before merge):
+    # c is referenced by a
+    assert len(st.forward_references[id(se_a)]) == 1
+    assert se_c in st.forward_references[id(se_a)]
+    assert len(st.forward_references[id(se_b)]) == 0
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert len(st.backward_references[id(se_a)]) == 0
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 1
+    assert se_a in st.backward_references[id(se_c)]
+
+    assert len(st.forward_references_id_props[id(se_a)]) == 1
+    assert se_c in st.forward_references_id_props[id(se_a)]
+    assert len(st.forward_references_id_props[id(se_b)]) == 0
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert len(st.backward_references_id_props[id(se_a)]) == 0
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 1
+    assert se_a in st.backward_references_id_props[id(se_c)]
+
+    assert len(st.forward_references_backref[id(se_a)]) == 1
+    assert se_c in st.forward_references_backref[id(se_a)]
+    assert len(st.forward_references_backref[id(se_b)]) == 0
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert len(st.backward_references_backref[id(se_a)]) == 0
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 1
+    assert se_a in st.backward_references_backref[id(se_c)]
+
+    st.set_id_of_node(se_a, 101)
+
+    # CHECK REFERENCE MAP (after merge):
+    # c is now referenced by b
+    assert id(se_a) not in st.forward_references
+    assert len(st.forward_references[id(se_b)]) == 1
+    assert se_c in st.forward_references[id(se_b)]
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 1
+    assert se_b in st.backward_references[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_id_props
+    assert len(st.forward_references_id_props[id(se_b)]) == 1
+    assert se_c in st.forward_references_id_props[id(se_b)]
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_id_props
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 1
+    assert se_b in st.backward_references_id_props[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_backref
+    assert len(st.forward_references_backref[id(se_b)]) == 1
+    assert se_c in st.forward_references_backref[id(se_b)]
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_backref
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 1
+    assert se_b in st.backward_references_backref[id(se_c)]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_merge_into_simple(simple_adapter):
+    # simple case: a -> c <- b (a & b reference c; a & b have the same target record)
+    c = db.Record(name='c').add_parent("RT2")
+    a = db.Record().add_parent("RT1").add_property('RT2', c)
+    b = db.Record().add_parent("RT1").add_property('RT2', c)
+
+    st = SyncGraph([a, b], simple_adapter)
+    se_a = st.nodes[0]
+    se_b = st.nodes[1]
+    se_c = st.nodes[2]
+
+    # CHECK REFERENCE MAP:
+    # c is referenced by a & b
+    assert len(st.forward_references[id(se_a)]) == 1
+    se_c in st.forward_references[id(se_a)]
+    assert len(st.forward_references[id(se_b)]) == 1
+    se_c in st.forward_references[id(se_b)]
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert len(st.backward_references[id(se_a)]) == 0
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 2
+    se_a in st.backward_references[id(se_c)]
+    se_b in st.backward_references[id(se_c)]
+
+    assert len(st.forward_references_id_props[id(se_a)]) == 1
+    se_c in st.forward_references_id_props[id(se_a)]
+    assert len(st.forward_references_id_props[id(se_b)]) == 1
+    se_c in st.forward_references_id_props[id(se_b)]
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert len(st.backward_references_id_props[id(se_a)]) == 0
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 2
+    se_a in st.backward_references_id_props[id(se_c)]
+    se_b in st.backward_references_id_props[id(se_c)]
+
+    assert len(st.forward_references_backref[id(se_a)]) == 1
+    se_c in st.forward_references_backref[id(se_a)]
+    assert len(st.forward_references_backref[id(se_b)]) == 1
+    se_c in st.forward_references_backref[id(se_b)]
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert len(st.backward_references_backref[id(se_a)]) == 0
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 2
+    se_a in st.backward_references_backref[id(se_c)]
+    se_b in st.backward_references_backref[id(se_c)]
+
+    st._merge_into(se_a, se_b)
+
+    # CHECK REFERENCE MAP (after merge):
+    # c is now referenced by b
+    # (same situation as above)
+    assert id(se_a) not in st.forward_references
+    assert len(st.forward_references[id(se_b)]) == 1
+    se_c in st.forward_references[id(se_b)]
+    assert len(st.forward_references[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references
+    assert len(st.backward_references[id(se_b)]) == 0
+    assert len(st.backward_references[id(se_c)]) == 1
+    se_b in st.backward_references[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_id_props
+    assert len(st.forward_references_id_props[id(se_b)]) == 1
+    se_c in st.forward_references_id_props[id(se_b)]
+    assert len(st.forward_references_id_props[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_id_props
+    assert len(st.backward_references_id_props[id(se_b)]) == 0
+    assert len(st.backward_references_id_props[id(se_c)]) == 1
+    se_b in st.backward_references_id_props[id(se_c)]
+
+    assert id(se_a) not in st.forward_references_backref
+    assert len(st.forward_references_backref[id(se_b)]) == 1
+    se_c in st.forward_references_backref[id(se_b)]
+    assert len(st.forward_references_backref[id(se_c)]) == 0
+    assert id(se_a) not in st.backward_references_backref
+    assert len(st.backward_references_backref[id(se_b)]) == 0
+    assert len(st.backward_references_backref[id(se_c)]) == 1
+    se_b in st.backward_references_backref[id(se_c)]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_backward_references_backref():
+    # We use the reference as identifying reference in both directions. Thus the map is the same
+    # for all three categories: references, id_references and id_referenced_by
+    ident_a = db.RecordType().add_parent("BR").add_property("name")
+    ident_b = db.RecordType().add_parent("C").add_property("is_referenced_by", ["BR"])
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_adapter.register_identifiable("BR", ident_a)
+    ident_adapter.register_identifiable("C", ident_b)
+
+    referenced = db.Record(name="B").add_parent("C")
+    ent_list = [referenced, db.Record(name="A").add_parent("BR").add_property("ref", referenced), ]
+
+    st = SyncGraph(ent_list, ident_adapter)
+    assert st.nodes[1] in st.backward_references_backref[id(st.nodes[0])]
+
+
+@patch("caoscrawler.identifiable_adapters.get_children_of_rt",
+       new=Mock(side_effect=lambda x: [x]))
+def test_set_id_of_node(simple_adapter):
+    # setting the id should lead to the node being marked as existing
+    ent_list = [db.Record(name='a').add_parent("RT5")]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 1
+    st.set_id_of_node(st.unchecked[0], 101)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._existing
+
+    # setting the id with None should lead to the node being marked as missing
+    ent_list = [db.Record().add_parent("RT1").add_property(name="RT2", value=1)]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 1
+    # is automatically set in during initialization of graph
+    assert st.nodes[0].identifiable is not None
+    st.set_id_of_node(st.unchecked[0])
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._missing
+
+    # setting the id to one that already exists should lead to a merge
+    ent_list = [
+        db.Record(id=101).add_parent("RT5"),
+        db.Record(name='a').add_parent("RT5").add_property(name="RT2", value=1)]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 1
+    st.set_id_of_node(st.unchecked[0], 101)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert st.nodes[0].properties[0].name == "RT2"
+
+    # setting the id to None should lead to depending nodes marked as missing
+    ent_list = [
+        db.Record().add_parent("RT3").add_property(name="a", value=1).add_property(
+            name="RT2", value=db.Record().add_parent("RT2")),
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    st.set_id_of_node(st.unchecked[0])
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._missing
+    assert id(st.nodes[1]) in st._missing
+
+    # same as above but with backref
+    ent_list = [
+        db.Record()
+        .add_parent("RT4")
+        .add_property(name="RT3",
+                      value=db.Record().add_parent("RT3").add_property(name="a", value=1)),
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    assert st.unchecked[1].identifiable is not None
+    st.set_id_of_node(st.unchecked[1])
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 0
+    assert id(st.nodes[0]) in st._missing
+    assert id(st.nodes[1]) in st._missing
+
+    # setting an id might allow to check another node that depends on the former
+    ent_list = [
+        db.Record()
+        .add_parent("RT4")
+        .add_property(name="RT3",
+                      value=db.Record().add_parent("RT3").add_property(name="a", value=1)),
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert st.nodes[0].identifiable is None
+    assert st.nodes[1].identifiable is not None
+    st.set_id_of_node(st.unchecked[1], 111)
+    assert st.nodes[0].identifiable is not None
+    assert st.nodes[1].identifiable is not None
+
+    # same as above but going one step further: the new identifiable allows to merge that node
+    ent_list = [
+        (db.Record()
+         .add_parent("RT4")
+         .add_property(name="RT3",
+                       value=db.Record().add_parent("RT3").add_property(name="a", value=1))),
+
+        (db.Record()
+         .add_parent("RT4")
+         .add_property(name="RT3", value=111))
+    ]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert st.nodes[0].identifiable is None
+    assert st.nodes[1].identifiable is not None
+    assert st.nodes[2].identifiable is not None
+    assert len(st.nodes) == 3
+    st.set_id_of_node(st.unchecked[2], 111)
+    assert st.nodes[0].identifiable is not None
+    assert len(st.nodes) == 2
+
+
+@patch("caoscrawler.sync_graph.cached_get_entity_by",
+       new=Mock(side_effect=mock_get_entity_by))
+def test_merging(simple_adapter):
+    # identifying information can be given at various locations in the hierachical tree
+    # test whether an object is correctly combined for all cases
+    ident_adapter = CaosDBIdentifiableAdapter()
+    ident_a = db.RecordType().add_parent("A").add_property("name").add_property("a")
+    ident_adapter.register_identifiable("A", ident_a)
+    ident_adapter.retrieve_identified_record_for_identifiable = Mock(
+        side_effect=partial(
+            basic_retrieve_by_name_mock_up, known={"A": db.Record(id=1111, name="A")}))
+
+    # merging based on id
+    ent_list = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=101).add_parent("A")]
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert 101 == st.nodes[0].id
+    assert "A" == st.nodes[0].parents[0].name
+
+    # merging based on path
+    ent_list = [
+        db.File(path='101').add_parent("A"),
+        db.File(path='101').add_parent("A")]
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert '101' == st.nodes[0].path
+    assert "A" == st.nodes[0].parents[0].name
+
+    # merging based on identifiable (non identifying properties are ignored)
+    ent_list = [
+        db.File(name='101').add_parent("A").add_property('a', value=1).add_property('b', value=1),
+        db.File(name='101').add_parent("A").add_property('a', value=1).add_property('b', value=2)]
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert st.nodes[0].id is None
+    assert '101' == st.nodes[0].name
+    assert "A" == st.nodes[0].parents[0].name
+    assert 1 == st.nodes[0].properties[0].value
+    assert "a" == st.nodes[0].properties[0].name
+
+    # Merging a mix. One Record needs the identifiable to be merged. But the identifying
+    # information is scattered in the other case.
+    ent_list = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=101, name='a').add_parent("A"),
+        db.Record(id=101).add_parent("A").add_property('a', value=1),
+        db.Record(name='a').add_parent("A").add_property('a', value=1)]
+
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 1
+    assert len(st.unchecked) == 0
+    assert 'a' == st.nodes[0].name
+    assert "A" == st.nodes[0].parents[0].name
+    assert 1 == st.nodes[0].properties[0].value
+    assert "a" == st.nodes[0].properties[0].name
+    assert 101 == st.nodes[0].id
+
+    # test that adding an ID can lead to a cascade of merges
+    # This also tests whether setting something to missing allows to create an identifiable
+    # and thus allows a merge
+    subtree = db.Record(name='a').add_parent("A").add_property('a', value=db.Record(
+        name='b').add_parent("A").add_property('a', value=db.Record(
+            name='c').add_parent("A").add_property('a', value="missing")))
+    ent_list = [
+        db.Record(id=101).add_parent("A"),
+        db.Record(id=101, name='z').add_parent("A"),
+        db.Record(id=101).add_parent("A").add_property('a', value=subtree),
+        db.Record(name='z').add_parent("A").add_property('a', value=subtree),
+    ]
+
+    st = SyncGraph(ent_list, ident_adapter)
+    assert len(st.nodes) == 5
+    assert len(st.unchecked) == 4
+    missing_one = [el for el in st.nodes if el.name == 'c'][0]
+    st.set_id_of_node(missing_one)
+    # setting c to missing means that b cannot exist which means that a cannot exist, this allows
+    # to merge the two z nodes
+    assert len(st.nodes) == 4
+    assert len(st.unchecked) == 0
+
+
+def test_update_of_reference_values(simple_adapter):
+    # multiple nodes are merged including one that is referenced
+    # assure that this still leads to the value of the property of the referencing node to be
+    # updated, when the id is set. (Value object is replaced appropriately)
+    a = db.Record().add_parent("RT3").add_property('a', value=1)
+    ent_list = [
+        a,
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT4").add_property('RT3', value=a),
+        db.Record().add_parent("RT3").add_property('a', value=1),
+        db.Record().add_parent("RT3").add_property('a', value=1)]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    assert 'RT4' == st.nodes[1].parents[0].name
+    st.set_id_of_node(st.nodes[0], 101)
+    b_prop = st.nodes[1].properties[0].value
+    assert b_prop.id == 101
+
+
+def test_ignoring_irrelevant_references(simple_adapter):
+    # make sure that a circle of references is no problem if one references is not identifying
+    b = db.Record(name='b').add_parent("RT5")
+    a = db.Record().add_parent("RT3").add_property('a', value=b)
+    b.add_property('a', value=a)
+    ent_list = [a, b]
+    st = SyncGraph(ent_list, simple_adapter)
+    assert len(st.nodes) == 2
+    assert len(st.unchecked) == 2
+    assert st.nodes[1].name == 'b'
+
+    # a relies on b
+    assert st._identity_relies_on_unchecked_entity(st.nodes[0])
+    # b relies on nothing
+    assert not st._identity_relies_on_unchecked_entity(st.nodes[1])
+    # set ID of b
+    st.set_id_of_node(st.nodes[1], 101)
+    assert len(st.unchecked) == 1
+    # now a nolonger relies on unchecked
+    assert not st._identity_relies_on_unchecked_entity(st.nodes[0])
+
+# 'is implementation insufficient'
+
+
+@pytest.mark.xfail()
+def test_detect_circular_dependency(crawler_mocked_identifiable_retrieve, caplog):
+    crawler = crawler_mocked_identifiable_retrieve
+    crawler.identifiableAdapter.get_registered_identifiable = Mock(
+        side_effect=lambda x: db.Record().add_parent('C').add_property(name='C'))
+    a = db.Record(name='a').add_parent("C")
+    b = db.Record(name='b').add_parent("C").add_property(name="C", value=a)
+    c = db.Record(name='c').add_parent("C").add_property(name='D', value='e'
+                                                         ).add_property(name="C", value=b)
+    d = db.Record(name='c').add_parent("C")
+    a.add_property(name="C", value=c)
+    flat = [a, b, c]
+    circle = Crawler.detect_circular_dependency(flat)
+    assert [id(el) for el in circle] == [id(el) for el in [a, c, b, a]]
+
+    assert Crawler.detect_circular_dependency([d]) is None
+    st = SyncGraph(flat, crawler.identifiableAdapter)
+    with raises(RuntimeError):
+        _, _ = crawler.split_into_inserts_and_updates(st)
+    caplog.set_level(logging.ERROR, logger="caoscrawler.converters")
+    assert "Found circular dependency" in caplog.text
+    assert "\n--------\n\n> Parent: C\n\n>> Name: a\n[\'C\']" in caplog.text
+    caplog.clear()
+
+
+def test_set_each_scalar_value():
+    """Test whether properties with None as value are treated appropriately."""
+    a = SyncNode(db.Record().add_parent("RT1").add_property(name="bla"),
+                 db.RecordType().add_property("is_referenced_by", ["RT2"]))
+    _set_each_scalar_value(a, lambda x: False, None)
+    _set_each_scalar_value(a, lambda x: isinstance(x, SyncNode), None)
+    _set_each_scalar_value(a, lambda x: x is None, lambda x: 42)
+    assert a.properties[0].value == 42
+    _set_each_scalar_value(a, lambda x: x == 42, lambda x: None)
+    assert a.properties[0].value is None
diff --git a/unittests/test_sync_node.py b/unittests/test_sync_node.py
new file mode 100644
index 0000000000000000000000000000000000000000..668a53470d028dfcfce7bb5785d68b685b034595
--- /dev/null
+++ b/unittests/test_sync_node.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+#
+# This file is a part of the LinkAhead Project.
+#
+# Copyright (C) 2024 Indiscale GmbH <info@indiscale.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+from functools import partial
+from unittest.mock import MagicMock, Mock, patch
+
+import linkahead as db
+import pytest
+from caoscrawler.exceptions import ImpossibleMergeError
+from caoscrawler.identifiable import Identifiable
+from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
+from caoscrawler.sync_graph import SyncGraph
+from caoscrawler.sync_node import SyncNode, parent_in_list, property_in_list
+
+from test_crawler import basic_retrieve_by_name_mock_up, mock_get_entity_by
+
+
+def assert_parents_equal(p1, p2):
+    """Special assertion for comparing parents."""
+    for a, b in zip(p1, p2):
+        assert a.id == b.id
+        assert a.name == b.name
+
+
+def assert_properties_equal(p1, p2):
+    """Special assertion for comparing properties."""
+    for a, b in zip(p1, p2):
+        assert a.id == b.id
+        assert a.name == b.name
+        assert a.value == b.value
+        assert a.datatype == b.datatype
+
+
+def test_sync_node():
+    # initialization
+    rec = (db.Record(id=101, name='101')
+           .add_parent("A")
+           .add_parent("B")
+           .add_parent(id=102)
+           .add_property(name="a", value='a')
+           .add_property(id=103, value='b'))
+    rec.description = "hallo"
+    sna = SyncNode(rec)
+    # check information stored in initialized SyncNode
+    assert "Record" in str(sna)
+    assert sna.id == rec.id
+    assert sna.role == rec.role
+    assert sna.name == rec.name
+    assert sna.description == rec.description
+    assert_parents_equal(sna.parents, rec.parents)
+    assert_properties_equal(sna.properties, rec.properties)
+    # ... special case File (path and file attributes)
+    fi = db.File(id=101, name='101', path='/a/')
+    snb = SyncNode(fi)
+    assert snb.role == fi.role
+    assert snb.name == fi.name
+    assert snb.id == fi.id
+    assert snb.path == fi.path
+    assert snb.file == fi.file
+
+    # check information in exported db.Entity
+    export = sna.export_entity()
+    assert export.id == rec.id
+    assert export.role == rec.role
+    assert export.name == rec.name
+    assert export.description == rec.description
+    assert_parents_equal(export.parents, rec.parents)
+    assert_properties_equal(export.properties, rec.properties)
+    export = snb.export_entity()
+    assert export.role == fi.role
+    assert export.name == fi.name
+    assert export.id == fi.id
+    assert export.path == fi.path
+    assert export.file == fi.file
+
+    # merge no common information
+    # ---------------------------
+    rec_a = (db.Record(name='101')
+             .add_parent("A")
+             .add_parent(id=102)
+             .add_property(name="a", value='a')
+             .add_property(id=103, value='b'))
+
+    rec_b = (db.Record(id=101)
+             .add_parent("B")
+             .add_parent(id=103)
+             .add_property(name="a", value='a')
+             .add_property(id=103, value='b'))
+    rec_b.description = "tja"
+
+    sn_a = SyncNode(rec_a)
+    sn_b = SyncNode(rec_b)
+    sn_a.update(sn_b)
+    # test information in updated node
+    assert sn_a.id == rec_b.id
+    assert sn_a.role == rec_a.role
+    assert sn_a.name == rec_a.name
+    assert sn_a.description == rec_b.description
+    for p in rec_a.parents + rec_b.parents:
+        assert p in sn_a.parents
+    for p in rec_a.properties + rec_b.properties:
+        assert p in sn_a.properties
+    # Check for duplicated property:
+    ps = [p for p in sn_a.properties if p.name == "a"]
+    assert len(ps) == 2
+    assert ps[0].value == "a"
+    assert ps[1].value == "a"
+
+    # test information in exported entity
+    export = sn_a.export_entity()
+    assert export.id == rec_b.id
+    assert export.name == rec_a.name
+    for p in rec_a.parents + rec_b.parents:
+        assert parent_in_list(p, export.parents)
+    for p in rec_a.properties + rec_b.properties:
+        if p.name is not None:
+            assert p.name in [el.name for el in export.properties]
+        if p.id is not None:
+            assert p.id in [el.id for el in export.properties]
+    assert len(export.properties) == 2
+    assert export.get_property('a').value == 'a'
+    assert export.get_property(103).value == 'b'
+    assert export.description == rec_b.description
+    assert export.role == rec_a.role
+
+    # merge with common information
+    # -----------------------------
+    rec_a = (db.Record(id=101, name='101')
+             .add_parent("A")
+             .add_parent(id=102)
+             .add_property(name="a", value='a'))
+
+    rec_b = (db.Record(id=101, name='101')
+             .add_parent("A")
+             .add_parent(id=102)
+             .add_property(name="a", value='a'))
+
+    sn_a = SyncNode(rec_a)
+    sn_b = SyncNode(rec_b)
+    sn_a.update(sn_b)
+    assert sn_a.id == rec_b.id
+    assert sn_a.name == rec_a.name
+    for p in rec_a.parents + rec_b.parents:
+        assert parent_in_list(p, sn_a.parents)
+    for p in rec_a.properties + rec_b.properties:
+        assert property_in_list(p, sn_a.properties)
+    assert sn_a.description == rec_b.description
+    assert sn_a.role == rec_a.role
+
+    # merge with conflicting information
+    # ----------------------------------
+    # ID mismatch
+    sn_a = SyncNode(db.Record(id=102))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.Record(id=101)))
+
+    # name mismatch
+    sn_a = SyncNode(db.Record(name='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.Record(name='101')))
+
+    # type mismatch
+    sn_a = SyncNode(db.Record(name='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.File(name='102')))
+
+    # description mismatch
+    sn_a = SyncNode(db.Record(description='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.Record(description='101')))
+
+    # path mismatch
+    sn_a = SyncNode(db.File(path='102'))
+    with pytest.raises(ImpossibleMergeError, match="Trying to update"):
+        sn_a.update(SyncNode(db.File(path='101')))
+
+    # identifiable mismatch
+    sn_a = SyncNode(db.File(path='102'))
+    sn_a.identifiable = Identifiable(name='a')
+    sn_b = SyncNode(db.File(path='101'))
+    sn_b.identifiable = Identifiable(name='b')
+    with pytest.raises(ValueError, match="identifiable"):
+        sn_a.update(sn_b)
+
+
+def test_export_node():
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_parent(id=103)
+             .add_property(name="a", value=[SyncNode(db.Record())])
+             .add_property(name='b', id=103, value='b'))
+
+    sn_a = SyncNode(rec_a)
+    exp = sn_a.export_entity()
+    assert exp.id == rec_a.id
+    assert exp.name == rec_a.name
+    for p in rec_a.parents:
+        assert len([el for el in exp.parents if p.name == el.name]) == 1
+    for p in rec_a.properties:
+        assert p.value == exp.get_property(p.name).value
+        if isinstance(p.value, list):
+            assert len(p.value) == len(exp.get_property(p.name).value)
+    assert len(exp.properties) == len(rec_a.properties)
+    assert len(exp.parents) == len(rec_a.parents)
+
+    # ---------------------------------------------------------------------------------------------
+    # NOTE: in the following we create a SyncNode object with twice the same Property as a short
+    # hand for a SyncNode that was created from one Entity with such a Property and then updating
+    # it with another SyncNode that also has the Property
+    # ---------------------------------------------------------------------------------------------
+
+    # same property name, different values
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value='b')
+             .add_property(name="a", value='a'))
+
+    # there should be a warning when multiproperties are used
+    with pytest.warns(UserWarning) as caught:
+        SyncNode(rec_a)
+        messages = {str(w.message) for w in caught}
+        assert ("Multiproperties are not supported by the crawler.") in messages
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # SyncNodes with same ID are considered equal
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record(id=1)))
+             .add_property(name="a", value=SyncNode(db.Record(id=1))))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value.id == 1
+    # SyncNodes convert multi properties into single properties
+    assert len([p for p in exp.properties if p.name == "a"]) == 1
+
+    # same SyncNode object is obviously equal
+    sn = SyncNode(db.Record(id=1))
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=sn)
+             .add_property(name="a", value=sn))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value.id == 1
+    assert len([p for p in exp.properties if p.name == "a"]) == 1
+
+    # different SyncNode Objects (without an ID) are not equal
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record()))
+             .add_property(name="a", value=SyncNode(db.Record())))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # different SyncNode Objects with differing ID are not equal
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record(id=1)))
+             .add_property(name="a", value=SyncNode(db.Record(id=2))))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # SyncNodes with same ID are considered equal (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))]))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value[0].id == 1
+    assert len([p for p in exp.properties if p.name == "a"]) == 1
+
+    # SyncNodes with same ID are not equal when in different order (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=2))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=2)), SyncNode(db.Record(id=1))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # same SyncNode object is obviously equal (list)
+    sn = SyncNode(db.Record(id=1))
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[sn])
+             .add_property(name="a", value=[sn]))
+
+    exp = SyncNode(rec_a).export_entity()
+    assert exp.get_property('a').value[0].id == 1
+
+    # different SyncNode Objects are not equal (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record())])
+             .add_property(name="a", value=[SyncNode(db.Record())]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # different SyncNode Objects with differing are not equal (list)
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=2))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # list vs no list
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=SyncNode(db.Record(id=1)))
+             .add_property(name="a", value=[SyncNode(db.Record(id=1))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()
+
+    # different list sizes
+    rec_a = (db.Record(id=101)
+             .add_parent("B")
+             .add_property(name="a", value=[SyncNode(db.Record(id=1))])
+             .add_property(name="a", value=[SyncNode(db.Record(id=1)), SyncNode(db.Record(id=1))]))
+
+    with pytest.raises(ImpossibleMergeError):
+        exp = SyncNode(rec_a).export_entity()