Skip to content
Snippets Groups Projects
Commit ac647773 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Minor Fixes

- use logger instaed of print
- switch args to be backwards compatible
- use correct exception
parent 4421e086
No related branches found
No related tags found
No related merge requests found
......@@ -50,18 +50,21 @@ def access(path):
if __name__ == "__main__":
set_log_level(logging.DEBUG)
logger = logging.getLogger("caosadvancedtools")
conlogger = logging.getLogger("connection")
conlogger.setLevel(level=logging.ERROR)
logger.setLevel(level=logging.WARN)
parser = get_parser()
args = parser.parse_args()
print("Starting query...")
logger.info("Starting query...")
files = Crawler.query_files(args.path)
print("Query done...")
logger.info("Query done...")
config = db.configuration.get_config()
c = Crawler(use_cache=True, access=access,
food=[ProjectCFood,
ExperimentCFood, AnalysisCFood,
PublicationCFood, SimulationCFood,
])
c.crawl(files, interactive=False, security_level=UPDATE)
c.crawl(files, interactive=False, security_level=INSERT, hideKnown=True)
......@@ -38,6 +38,7 @@ import logging
import re
import caosdb as db
from caosdb.exceptions import EntityDoesNotExist
from .guard import global_guard as guard
......@@ -77,7 +78,7 @@ class AbstractCFood(object):
A function that takes a CaosDB path and returns a local path
"""
self.access = access
self.crawled_file = None
self._crawled_file = None
self.crawled_path = crawled_file
self.match = type(self).match(crawled_file)
self.to_be_updated = db.Container()
......@@ -85,6 +86,25 @@ class AbstractCFood(object):
self.attached_ones = []
self.attached_filenames = []
@property
def crawled_file(self):
if self._crawled_file is None:
try:
q = "FIND FILE WHICH IS STORED AT '{}'".format(
self.crawled_path)
self._crawled_file = db.execute_query(q, unique=True)
except EntityDoesNotExist:
path = "**"
if not self.crawled_path.startswith("/"):
path = path + "/"
q = "FIND FILE WHICH IS STORED AT '{}{}'".format(path,
self.crawled_path)
logger.debug(q)
self._crawled_file = db.execute_query(q, unique=True)
return self._crawled_file
def collect_information(self):
""" The CFood collects information for further processing.
......
......@@ -37,6 +37,7 @@ match. This occurs in basically three steps:
import logging
import os
import traceback
from datetime import datetime
......@@ -55,11 +56,15 @@ def separated(text):
class UnknownCache(object):
def __init__(self, interactive=False):
def __init__(self, interactive=False, load=False):
if interactive and "y" == input(
"\nDo you want to load filenames that previously were not "
"matched by any CFood?\nIn that case, they will not show up "
"again. (y)"):
load = True
if load and os.path.exists("known_cache.db"):
with open("known_cache.db") as fi:
self.filenames = [el.strip("\n") for el in fi.readlines()]
else:
......@@ -99,7 +104,7 @@ class Crawler(object):
if self.use_cache:
self.cache = Cache()
def match(self, files, interactive):
def match(self, files, interactive, hideKnown=False):
errors_occured = False
tbs = []
cfoods = []
......@@ -151,12 +156,12 @@ class Crawler(object):
# possibly load previously encountered "Missing matches" and
# "Multiple matches"
ucache = UnknownCache(interactive=interactive)
ucache = UnknownCache(interactive=interactive, load=hideKnown)
for crawled_file in files:
if len(matches[crawled_file]) == 0:
msg = ("ATTENTION: No matching cfood!\n"
"Tried to match {}".format(crawled_file))
"Tried to match {}\n".format(crawled_file))
if crawled_file in ucache.filenames:
logger.debug(msg)
......@@ -165,28 +170,30 @@ class Crawler(object):
ucache.add(crawled_file)
if len(matches[crawled_file]) > 1:
msg = ("Attention: More than one matching cfood!"
msg = ("Attention: More than one matching cfood!\n"
+ "Tried to match {}\n".format(crawled_file)
+ "\tRecordTypes:\t" + ", ".join(
matches[crawled_file]))
matches[crawled_file])+"\n")
if crawled_file in ucache.filenames:
logger.debug(msg)
else:
logger.warn(msg)
ucache.add(crawled_file)
ucache.add(crawled_file)
# Save the encountered prblem matches
ucache.save()
return cfoods, matches, tbs, errors_occured
def crawl(self, files, interactive=True, security_level=RETRIEVE):
def crawl(self, files, interactive=True, hideKnown=False,
security_level=RETRIEVE):
guard.set_level(level=security_level)
files = sorted([f.path for f in files])
cfoods, matches, tbs, errors_occured = self.match(files, interactive)
cfoods, matches, tbs, errors_occured = self.match(files, interactive,
hideKnown=hideKnown)
if interactive and "y" != input("Do you want to continue? (y)"):
return
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment