From 5143f8b6b1ec39fea8da5df1afa743a576a057fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <henrik@trineo.org> Date: Mon, 3 Jun 2019 09:33:14 +0200 Subject: [PATCH] MAINT minor and comments --- src/caosadvancedtools/cfood.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/caosadvancedtools/cfood.py b/src/caosadvancedtools/cfood.py index 05cf6f1c..617a8c7a 100644 --- a/src/caosadvancedtools/cfood.py +++ b/src/caosadvancedtools/cfood.py @@ -38,13 +38,24 @@ ENTITIES = {} def get_value(prop): + """returns the value of a Property + + If the value is an Entity itself, its id is returned instead + """ + if not isinstance(prop.value, db.Entity): return prop.value else: - return str(prop.value.id) + return prop.value.id def get_entity(name): + """implements a mini cache of RecordTypes + + If and only if a RecordType (identified by its name) is not in the + dictionary, it is retrieved from CaosDB. + """ + if name not in ENTITIES: ent = db.Entity(name=name) ent.retrieve() @@ -63,6 +74,9 @@ class AbstractCFood(object): self.use_cache = use_cache self.access = access + if self.use_cache: + self.identifiable_cache = Cache() + def treat_match(self, crawled_file, match): print(crawled_file) @@ -81,8 +95,9 @@ class AbstractCFood(object): if self.use_cache: identifiable_cache = Cache() identifier = Cache.hash_entity(identifiable) - print(identifier) - cached_id = identifiable_cache.check_existing(identifier) + cached_id = self.identifiable_cache.check_existing(identifier) + + # retrieve entity for the cached id if cached_id is not None: existing = db.execute_query("FIND {}".format(cached_id), @@ -151,18 +166,25 @@ class AbstractCFood(object): @staticmethod def find_existing(entity): + """searches for an entity that matches the identifiable in CaosDB + + Characteristics of the identifiable like, properties, name or id are + used for the match. + """ + if entity.name is None: + # TODO multiple parents are ignored! Sufficient? query_string = "FIND Record " + entity.get_parents()[0].name - #import IPython - # IPython.embed() query_string += " with " + " and ".join( ["'" + p.name + "'='" - + get_value(p) + "'" for p in entity.get_properties()]) + + str(get_value(p)) + "'" for p in entity.get_properties()]) else: query_string = "FIND '{}'".format(entity.name) print(query_string) q = db.Query(query_string) + # the identifiable should identify an object uniquely. Thus the query + # is using the unique keyword try: r = q.execute(unique=True) except TransactionError as er: -- GitLab