diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index d88f8b939eb6fdac4b3aadeee3297816198a19bc..23e39cbb31545cb69cffdf3cae26c07d67763dea 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -385,7 +385,7 @@ class Crawler(object): if st.raise_problems: raise else: - remove, rmrefs = st.remove_failed(se) + remove = st.remove_failed(se) entity_was_treated = True continue @@ -413,6 +413,9 @@ class Crawler(object): se.identifiable)) remote_id = None if identified_record is not None: + if se.registered_identifiable.get_property("name") is None: + print("FOUND") + print(se) remote_id = identified_record.id st.set_id_of_node(se, remote_id) entity_was_treated = True diff --git a/src/caoscrawler/identifiable_adapters.py b/src/caoscrawler/identifiable_adapters.py index 7d20b784add4edaa41516e8c5654fd679e339137..13ff9b1c516a40b43419148766982f862f15f503 100644 --- a/src/caoscrawler/identifiable_adapters.py +++ b/src/caoscrawler/identifiable_adapters.py @@ -390,6 +390,8 @@ startswith: bool, optional def referencing_entity_has_appropriate_type(parents, register_identifiable): if register_identifiable.get_property("is_referenced_by") is None: return False + if register_identifiable.get_property("is_referenced_by").value is None: + return False appropriate_types = [el.lower() for el in register_identifiable.get_property("is_referenced_by").value] if "*" in appropriate_types: diff --git a/src/caoscrawler/sync_graph.py b/src/caoscrawler/sync_graph.py index 0bd038abe6884fe93215bfec6fb12b25882e03ad..468fab51a774944f06b94fb82c9f5e29ccf02132 100644 --- a/src/caoscrawler/sync_graph.py +++ b/src/caoscrawler/sync_graph.py @@ -141,7 +141,7 @@ class SyncGraph(): else: remove = self.remove_failed(node) logger.error(exc) - logger.error(f"{len(remove)}, {len(rmrefs)}") + logger.error(f"{len(remove)}") for node in list(self.nodes): try: identifiable = self.identifiableAdapter.get_identifiable( @@ -201,6 +201,7 @@ class SyncGraph(): for referencing in self.backward_references[node.uuid]: if referencing.id > 0: # check whether the node can be associated with an existing one + logger.error(str(referencing)) raise NotImplementedError('update of existing non ident') break # if we reach this, there is no existing entity, thus we can just insert node @@ -210,14 +211,6 @@ class SyncGraph(): entities = [] node_map = {} self.deal_with_non_ident() - for k, el in self.forward_references.items(): - print(k, "references") - for e in el: - print(e.uuid) - print('UNCHECKED ENTITIES') - for el in self.unchecked: - print(el.uuid) - print(el) for el in self.nodes: try: el.export_entity() @@ -227,7 +220,7 @@ class SyncGraph(): else: remove = self.remove_failed(el) logger.error(exc) - logger.error(f"{len(remove)}, {len(rmrefs)}") + logger.error(f"{len(remove)}") for el in self.nodes: try: entities.append(el.export_entity()) @@ -238,7 +231,7 @@ class SyncGraph(): else: remove = self.remove_failed(el) logger.error(exc) - logger.error(f"{len(remove)}, {len(rmrefs)}") + logger.error(f"{len(remove)}") if len(self.unchecked) > 1: self.unchecked_contains_circular_dependency() @@ -287,7 +280,7 @@ class SyncGraph(): if referenced in self.unchecked: if referenced in circle: closed = True - circle.append(pval) # FIXME + circle.append(referenced) added_to_circle = True if not added_to_circle: return None @@ -403,7 +396,7 @@ class SyncGraph(): self.forward_id_referenced_by[other.uuid].remove(node) for el in remove: - rm, rf = self.remove_failed(el) + rm = self.remove_failed(el) remove.extend(rm) return remove diff --git a/tox.ini b/tox.ini index e587774323171dda590c4e9198e049c1ed0e0e14..1b308b34ef15da4cfed8410eb75ca8669aa3bc20 100644 --- a/tox.ini +++ b/tox.ini @@ -18,4 +18,5 @@ max-line-length = 100 [pytest] testpaths = unittests -xfail_strict = True \ No newline at end of file +xfail_strict = True +pdbcls = IPython.terminal.debugger:Pdb