diff --git a/src/newcrawler/identifiable_adapters.py b/src/newcrawler/identifiable_adapters.py index 152a774e49b188a03f56d0ce43f4d84abf5de1d7..be725993da4b0300c8092148c980d150d6da0ee5 100644 --- a/src/newcrawler/identifiable_adapters.py +++ b/src/newcrawler/identifiable_adapters.py @@ -420,7 +420,7 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter): return record return record.id - def retrieve_identified_record(self, identifiable: db.Record): + def retrieve_identified_record_for_identifiable(self, identifiable: db.Record): query_string = self.create_query_for_identifiable(identifiable) candidates = db.execute_query(query_string) if len(candidates) > 1: diff --git a/src/newcrawler/identified_cache.py b/src/newcrawler/identified_cache.py index e02e19d86f8f262f984a6ae5b7a84675ef259581..eed5ab40aa5e707c37e34aafdf2f02285e7394e9 100644 --- a/src/newcrawler/identified_cache.py +++ b/src/newcrawler/identified_cache.py @@ -36,6 +36,15 @@ def _create_hashable_string(identifiable: db.Record): """ creates a string from the attributes of an identifiable that can be hashed """ + if identifiable.role == "File": + # Special treatment for files: + return "P<>N<>{}:{}".format("path", identifiable.path) + if len(identifiable.parents) != 1: + # TODO: extend this + # maybe something like this: + # parent_names = ",".join( + # sorted([p.name for p in identifiable.parents]) + raise RuntimeError("Cache entry can only be generated for entities with 1 parent.") rec_string = "P<{}>N<{}>".format(identifiable.parents[0].name, identifiable.name) for pname in sorted([p.name for p in identifiable.properties]): value = str(identifiable.get_property(pname).value)