Skip to content
Snippets Groups Projects
Commit f5267a4a authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'mr-comments'

parents bafbc477 f4f31e11
No related branches found
No related tags found
No related merge requests found
......@@ -38,22 +38,28 @@ ENTITIES = {}
def get_value(prop):
"""returns the value of a Property
""" Returns the value of a Property
Parameters
----------
prop : The property of which the value shall be returned.
Returns
-------
out : The value of the property; if the value is an entity, its ID.
If the value is an Entity itself, its id is returned instead
"""
if not isinstance(prop.value, db.Entity):
return prop.value
else:
if isinstance(prop.value, db.Entity):
return prop.value.id
else:
return prop.value
def get_entity(name):
"""implements a mini cache of RecordTypes
""" Returns the entity with a given name, preferably from a local cache.
If and only if a RecordType (identified by its name) is not in the
dictionary, it is retrieved from CaosDB.
If the local cache does not contain the entity, retrieve it from CaosDB.
"""
if name not in ENTITIES:
......@@ -69,7 +75,23 @@ class AbstractCFood(object):
# a match
# instances shall be used to keep track of a match; i.e. entities can be
# object variable
def __init__(self, pattern, use_cache=False, access=lambda x: x):
"""Abstract base class for Crawler food (CFood).
Parameters
----------
pattern : str
The regex pattern for matching against file names.
use_cache : bool, optional
Whether to use caching (not re-inserting probably existing
objects into CaosDB), defaults to False.
access : callable, optional
Only used by child classes?
"""
self.pattern = re.compile(pattern)
self.use_cache = use_cache
self.access = access
......@@ -85,8 +107,8 @@ class AbstractCFood(object):
for key, identifiable in entities.items():
if identifiable is None:
print("THIS IS STRANGE")
print("THIS IS STRANGE. No identifiables found in {}.".format(
crawled_file})
continue
existing = None
......@@ -175,7 +197,7 @@ class AbstractCFood(object):
if entity.name is None:
# TODO multiple parents are ignored! Sufficient?
query_string = "FIND Record " + entity.get_parents()[0].name
query_string += " with " + " and ".join(
query_string += " WITH " + " AND ".join(
["'" + p.name + "'='"
+ str(get_value(p)) + "'" for p in entity.get_properties()])
else:
......@@ -193,7 +215,7 @@ class AbstractCFood(object):
if r is not None:
print("Found Entity with id:", r.id)
else:
print("Did not find an existing entity")
print("Did not find an existing entity.")
return r
......
......@@ -54,7 +54,7 @@ class Crawler(object):
(path if path.endswith("/") else path + "/") + "**"
print("FILES QUERY: " + query_str)
files = db.execute_query(query_str)
print(str(len(files)) + " FILES TO BE PROCESSED.")
print("{} FILES TO BE PROCESSED.".format(len(files)))
return files
......
......@@ -55,11 +55,7 @@ def _clean_header(header):
class NoValidHeader(Exception):
def __init__(self, filename, *args, **kwargs):
msg = ("Header missing in {}\nFix this with the modify subcommand "
"using -f option".format(filename))
super().__init__(msg, *args, **kwargs)
pass
def get_header(filename):
"""Open an md file identified by filename and read out the yaml
......
......@@ -23,10 +23,13 @@
# ** end header
#
import caosdb as db
from caosadvancedtools.cfood import AbstractCFood
class ExampleCFood(AbstractCFood):
def create_identifiables(self, crawled_file, match):
print("create_identifiables")
entities = {}
entities["exp"] = db.Record()
#import IPython
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment