diff --git a/integrationtests/README.md b/integrationtests/README.md
index c1f96606a46de4dd96f90fd4a1e46957100e68b3..5c308f51a332d5a930f91eb30f0d93032ae47627 100644
--- a/integrationtests/README.md
+++ b/integrationtests/README.md
@@ -1,3 +1,3 @@
-1. Clear database
-2. Insert model
+1. Clear database (see clear_database.py)
+2. Insert model (see insert_model.py)
 3. Run test.py
diff --git a/integrationtests/model.yml b/integrationtests/model.yml
index 7d78ac7ef4bc792f54594b29a8ac311479f41a59..055c4fb5b7894c3f444859d15ad1dbc806fa3fab 100644
--- a/integrationtests/model.yml
+++ b/integrationtests/model.yml
@@ -80,9 +80,6 @@ Report:
   - Publication
 hdf5File:
   datatype: REFERENCE
-extern:
-  - TestRT1
-  - TestP1
 Measurement:
   recommended_properties:
     date:
diff --git a/src/newcrawler/crawl.py b/src/newcrawler/crawl.py
index 91b7ddae14caca3243ff35c39f70c88bc5bcdf52..db0bbddb51bb60cfe2af214172e91635412c15e5 100644
--- a/src/newcrawler/crawl.py
+++ b/src/newcrawler/crawl.py
@@ -300,8 +300,7 @@ class Crawler(object):
                     # information
                     # Update an (local) identified record that will be inserted
                     newrecord = self.get_identified_record_from_local_cache(record)
-                    self.copy_attributes(
-                        fro=record, to=newrecord)
+                    self.copy_attributes(fro=record, to=newrecord)
                     # Bend references to the other object
                     # TODO refactor this
                     for el in flat + to_be_inserted + to_be_updated:
@@ -333,7 +332,7 @@ class Crawler(object):
                         record.id = identified_record.id
                         to_be_updated.append(record)
                         # TODO think this through
-                        # self.add_identified_record_to_local_cache(record)
+                        self.add_identified_record_to_local_cache(record)
                         del flat[i]
                     resolved_references = True
 
@@ -391,7 +390,7 @@ class Crawler(object):
                         attr_val = comp[0]["properties"][key][attribute]
                         other_attr_val = (comp[1]["properties"][key][attribute]
                                           if attribute in comp[1]["properties"][key] else None)
-                        if attr_val is not None and atrr_val != other_attr_val:
+                        if attr_val is not None and attr_val != other_attr_val:
                             identical = False
                             break
 
diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py
index f11a7fc101225db4fd3bdd15f3ad397425930d08..89b8d4e19c0f28cd51085f1b131e37a1f17b0ae4 100644
--- a/src/newcrawler/identifiable_adapters.py
+++ b/src/newcrawler/identifiable_adapters.py
@@ -26,7 +26,7 @@
 from datetime import datetime
 import caosdb as db
 from abc import abstractmethod
-from .utils import get_value, has_parent
+from .utils import has_parent
 from caosdb.common.datatype import is_reference
 from .utils import has_parent
 
diff --git a/unittests/test_converters.py b/unittests/test_converters.py
index 3ec1764631c4de7b5a7cc247cc559d0dc5f5939c..b9a730c226bb25fd97b23cd39f61d2237758de91 100644
--- a/unittests/test_converters.py
+++ b/unittests/test_converters.py
@@ -31,6 +31,7 @@ from newcrawler.converters import Converter
 from newcrawler.stores import GeneralStore
 from newcrawler.converters import MarkdownFileConverter
 from newcrawler.structure_elements import Directory
+from newcrawler.structure_elements import File, DictTextElement, DictListElement
 
 from test_tool import rfp
 
diff --git a/unittests/test_tool.py b/unittests/test_tool.py
index 4b55de8749d3651e4e21482bd903c7da62a96d58..51774720e1ce6bda53575c0dfc98c04856fedf7a 100755
--- a/unittests/test_tool.py
+++ b/unittests/test_tool.py
@@ -40,6 +40,7 @@ def crawler():
                             rfp("scifolder_cfood.yml"))
     return crawler
 
+
 @pytest.fixture
 def ident(crawler):
     ident = LocalStorageIdentifiableAdapter()
@@ -352,13 +353,13 @@ def test_split_into_inserts_and_updates_single(mock_retrieve):
 
 def test_split_into_inserts_and_updates_with_duplicate(mock_retrieve):
     crawler = mock_retrieve
-    # try it with a reference
     a = db.Record(name="A").add_parent("C")
     b = db.Record(name="B").add_parent("C")
     b.add_property("A", a)
+    # This is identical to a and should be removed
     c = db.Record(name="A").add_parent("C")
     entlist = [a, b, c]
-    insert, update = crawler.split_into_inserts_and_updates(entlist)
+    insert, update = crawler.split_into_inserts_and_updates(deepcopy(entlist))
     assert len(insert) == 1
     assert insert[0].name == "B"
     assert len(update) == 1
@@ -412,6 +413,7 @@ def test_split_into_inserts_and_updates_with_complex(mock_retrieve):
     # TODO write test where the unresoled entity is not part of the identifiable
 
 
+@pytest.mark.xfail
 def test_split_into_inserts_and_updates_with_copy_attr(mock_retrieve):
     crawler = mock_retrieve
     # assume identifiable is only the name