Skip to content
Snippets Groups Projects
Commit 8efdc037 authored by Henrik tom Woerden's avatar Henrik tom Woerden
Browse files

Restructure CFood and Crawler

parent dedf5146
No related branches found
No related tags found
No related merge requests found
...@@ -118,3 +118,35 @@ def return_field_or_property(value, prop=None): ...@@ -118,3 +118,35 @@ def return_field_or_property(value, prop=None):
return value[prop] return value[prop]
else: else:
return value return value
def find_records_that_reference_ids(referenced_ids, rt="", step_size=50):
""" Returns a list with ids of records that reference entities with
supplied ids
Sometimes a file or folder will be referenced in a README.md (e.g. in an
Analysis) but not those files shall be referenced but the corresponding
object (e.g. the Experiment). Thus the ids of all Records (of a suitable
type) are collected that reference one or more of the supplied ids.
This is done in chunks as the ids are passed in the header of the http
request.
"""
record_ids = set()
index = 0
while index < len(referenced_ids):
subset = referenced_ids[index:min(
index+step_size, len(referenced_ids))]
try:
q_string = ("FIND Record {} which references \n".format(rt)
+ " or which references \n".join(
[str(el) for el in subset]))
exps = db.execute_query(q_string)
record_ids.update([exp.id for exp in exps])
except Exception as e:
print(e)
pass
index += step_size
return list(record_ids)
...@@ -26,6 +26,7 @@ from copy import deepcopy ...@@ -26,6 +26,7 @@ from copy import deepcopy
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
import caosdb as db import caosdb as db
from caosadvancedtools.cache import Cache from caosadvancedtools.cache import Cache
...@@ -45,9 +46,7 @@ class CacheTest(unittest.TestCase): ...@@ -45,9 +46,7 @@ class CacheTest(unittest.TestCase):
ent2 = db.Record() ent2 = db.Record()
ent2.add_parent(name="Experiment") ent2.add_parent(name="Experiment")
ent_hash = Cache.hash_entity(ent) ent_hash = Cache.hash_entity(ent)
print(ent_hash)
ent2_hash = Cache.hash_entity(ent2) ent2_hash = Cache.hash_entity(ent2)
print(ent2_hash)
self.cache.insert(ent2_hash, 1235) self.cache.insert(ent2_hash, 1235)
assert type(self.cache.check_existing(ent2_hash)) is int assert type(self.cache.check_existing(ent2_hash)) is int
assert self.cache.check_existing(ent_hash) is None assert self.cache.check_existing(ent_hash) is None
...@@ -57,3 +56,22 @@ class CacheTest(unittest.TestCase): ...@@ -57,3 +56,22 @@ class CacheTest(unittest.TestCase):
def tearDown(self): def tearDown(self):
os.remove(self.cache.db_file) os.remove(self.cache.db_file)
def test_update_ids_from_cache(self):
ent = db.Record()
ent2 = db.Record()
ent2.add_parent(name="Experiment")
ent3 = db.Record()
ent3.add_parent(name="Analysis")
test_id = 2353243
self.cache.insert(Cache.hash_entity(ent2), test_id)
entities = [ent, ent2, ent3]
hashes = self.cache.update_ids_from_cache(entities)
self.assertEqual(ent2.id, test_id)
# test
ent.id = 1001
ent3.id = 1003
self.cache.insert_list(hashes, entities)
self.assertEqual(self.cache.check_existing(hashes[0]), 1001)
self.assertEqual(self.cache.check_existing(hashes[2]), 1003)
...@@ -25,29 +25,25 @@ import unittest ...@@ -25,29 +25,25 @@ import unittest
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
import caosdb as db import caosdb as db
from caosadvancedtools.cfood import AbstractCFood
# TODO this is more like an integration test. should be moved from caosadvancedtools.cfood import AbstractCFood
class CFoodTest(unittest.TestCase): PATTERN = "h.*"
def setUp(self):
self.exp = db.Record()
self.exp.add_parent(name="Experiment")
self.exp.add_property(name="species", value="microunicorn")
def test_check_existence(self):
assert AbstractCFood.find_existing(self.exp) is None
class TestCFood(AbstractCFood):
class CFoodTestExist(CFoodTest): @staticmethod
def setUp(self): def get_re():
super().setUp() return PATTERN
self.exp.insert()
def test_check_existence(self):
res = AbstractCFood.find_existing(self.exp)
assert res.id == self.exp.id
def tearDown(self): class CFoodReTest(unittest.TestCase):
self.exp.delete() def test(self):
self.assertEquals(TestCFood.get_re(), PATTERN)
self.assertEqual(TestCFood._pattern, None)
self.assertIsNotNone(TestCFood.match("hallo"))
self.assertIsNotNone(TestCFood._pattern)
self.assertIsNotNone(TestCFood.match("hallo"))
self.assertIsNone(TestCFood.match("allo"))
...@@ -26,6 +26,8 @@ from tempfile import NamedTemporaryFile ...@@ -26,6 +26,8 @@ from tempfile import NamedTemporaryFile
import caosdb as db import caosdb as db
import pandas as pd import pandas as pd
from caosdb.apiutils import compare_entities
from caosadvancedtools.table_converter import (from_table, from_tsv, to_table, from caosadvancedtools.table_converter import (from_table, from_tsv, to_table,
to_tsv) to_tsv)
...@@ -78,14 +80,16 @@ class ToTsvTest(unittest.TestCase): ...@@ -78,14 +80,16 @@ class ToTsvTest(unittest.TestCase):
c.append(r) c.append(r)
to_tsv(NamedTemporaryFile().name, c) to_tsv(NamedTemporaryFile().name, c)
# TODO reactivate this test
class IntegrationTest(unittest.TestCase): # class IntegrationTest(unittest.TestCase):
""" converts tsv to a container and back and compares origin with # """ converts tsv to a container and back and compares origin with
result """ # result """
#
def test_backandforth(self): # def test_backandforth(self):
cont = from_tsv(TEST_TABLE, "Measurement") # cont = from_tsv(TEST_TABLE, "Measurement")
tempfile = NamedTemporaryFile(delete=False) # tempfile = NamedTemporaryFile(delete=False)
to_tsv(tempfile.name, cont) # to_tsv(tempfile.name, cont)
with open(TEST_TABLE, "r") as no1, open(tempfile.name, "r") as no2: # cont_new = from_tsv(tempfile.name, "Measurement")
assert no1.read() == no2.read() #
# for ent1, ent2 in zip(cont_new, cont):
# assert compare_entities(ent1, ent2) == ([], [])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment