Skip to content
Snippets Groups Projects
Commit 5eeb0924 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

ENH: separated collecting information from initialisation

parent 6f2ab3a3
Branches
Tags
No related merge requests found
......@@ -82,6 +82,19 @@ class AbstractCFood(object):
self.identifiables = db.Container()
self.verbosity = verbosity
self.attached_ones = []
self.attached_filenames = []
def collect_information(self):
""" The CFood collects information for further processing.
Often CFoods need information from files or even from the database in
order to make processing decision. It is intended that this function is
called after match. Thus match can be used without connecting to the
database.
To be overwritten by subclasses
"""
pass
@staticmethod
def get_re():
......@@ -175,16 +188,19 @@ class AbstractCFood(object):
def looking_for(self, crawled_file):
"""
returns True if crawled_file can be added this CFood.
to be overwritten by subclasses.
Sometimes files belong to the CFood of created by another file. This
function can be used to define what files shall be 'attached'. E.g. the
data from an experiment should always contain a labnotes scan. Then a
subclass CFood for the experiment could be 'looking_for' this scan
using this function.
returns True if crawled_file can be added to this CFood.
Typically a CFood exists for a file and defines how to deal with the
file. However, sometimes additional files "belong" to a CFood. E.g. an
experiment CFood might match against a README file but labnotes.txt
also shall be treated by the cfood (and not a special cfood created for
labnotes.txt)
This function can be used to define what files shall be 'attached'.
"""
if crawled_file in self.attached_filenames:
return True
return False
@staticmethod
......
......@@ -114,6 +114,13 @@ class Crawler(object):
errors_occured = True
tbs.append(e)
if self.verbosity >= INFO:
print("-"*60)
print("CFoods are collecting information...")
for cfood in cfoods:
cfood.collect_information()
if self.verbosity >= INFO:
print("-"*60)
print("Trying to attach files to created CFoods")
......@@ -149,7 +156,7 @@ class Crawler(object):
def crawl(self, files, interactive=True, security_level=RETRIEVE):
guard.set_level(level=security_level)
files = sorted(files, key=lambda x: x.path)
files = sorted([f.path for f in files])
cfoods, matches, tbs, errors_occured = self.match(files)
......
......@@ -82,12 +82,7 @@ def find_file_included_by(glob):
query_string = "FIND file which is stored at {}".format(glob)
try:
return db.execute_query(query_string)
except Exception as e:
print(e)
return []
def assure_absolute_path_in_glob(glob, prefix):
......
......@@ -76,10 +76,8 @@ class InsertionTest(unittest.TestCase):
class ExampleTest(unittest.TestCase):
def test(self):
dummy_file = db.File(path="/data/rabbit/2019-03-03/README.md")
print(dummy_file.path)
print(ExampleCFood.get_re())
cf = ExampleCFood(crawled_file=dummy_file)
self.assertIsNotNone(ExampleCFood.match(dummy_file.path))
path = "/data/rabbit/2019-03-03/README.md"
cf = ExampleCFood(crawled_file=path)
self.assertIsNotNone(ExampleCFood.match(path))
self.assertEqual(cf.match.group('species'), 'rabbit')
self.assertEqual(cf.match.group('date'), '2019-03-03')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment