Skip to content
Snippets Groups Projects
Commit b325d6fb authored by Florian Spreckelsen's avatar Florian Spreckelsen
Browse files

Merge branch 'f-refactor-main' into 'dev'

F refactor main

See merge request caosdb/src/crawler2.0!13
parents 54bf5162 25f76a10
No related branches found
No related tags found
2 merge requests!53Release 0.1,!13F refactor main
...@@ -893,15 +893,21 @@ class Crawler(object): ...@@ -893,15 +893,21 @@ class Crawler(object):
return self.updateList return self.updateList
def main(): def crawler_main(args_path,
args = parse_args() args_cfood,
crawler = Crawler(debug=args.debug) args_load_identifiables,
crawler.crawl_directory(args.path, args.cfood) args_debug,
if args.provenance is not None: args_provenance,
crawler.save_debug_data(args.provenance) args_dry_sync,
args_sync,
if args.load_identifiables is not None: args_prefix):
with open(args.load_identifiables, "r") as f: crawler = Crawler(debug=args_debug)
crawler.crawl_directory(args_path, args_cfood)
if args_provenance is not None:
crawler.save_debug_data(args_provenance)
if args_load_identifiables is not None:
with open(args_load_identifiables, "r") as f:
identifiable_data = yaml.safe_load(f) identifiable_data = yaml.safe_load(f)
ident = CaosDBIdentifiableAdapter() ident = CaosDBIdentifiableAdapter()
...@@ -914,7 +920,7 @@ def main(): ...@@ -914,7 +920,7 @@ def main():
rt.add_property(name=pn) rt.add_property(name=pn)
ident.register_identifiable(k, rt) ident.register_identifiable(k, rt)
if args.dry_sync: if args_dry_sync:
ins, upd = crawler.synchronize(commit_changes=False) ins, upd = crawler.synchronize(commit_changes=False)
inserts = [str(i) for i in ins] inserts = [str(i) for i in ins]
updates = [str(i) for i in upd] updates = [str(i) for i in upd]
...@@ -922,12 +928,14 @@ def main(): ...@@ -922,12 +928,14 @@ def main():
f.write(yaml.dump({ f.write(yaml.dump({
"insert": inserts, "insert": inserts,
"update": updates})) "update": updates}))
elif args.sync: elif args_sync:
rtsfinder = dict() rtsfinder = dict()
for elem in crawler.updateList: for elem in crawler.updateList:
if isinstance(elem, db.File): if isinstance(elem, db.File):
# correct the file path: # correct the file path:
# elem.file = os.path.join(args.path, elem.file) # elem.file = os.path.join(args.path, elem.file)
if elem.path.startswith(args_prefix):
elem.path = elem.path[len(args_prefix):]
elem.file = None elem.file = None
# TODO: as long as the new file backend is not finished # TODO: as long as the new file backend is not finished
# we are using the loadFiles function to insert symlinks. # we are using the loadFiles function to insert symlinks.
...@@ -982,8 +990,24 @@ def parse_args(): ...@@ -982,8 +990,24 @@ def parse_args():
help="Do the synchronization. This is probably the expected " help="Do the synchronization. This is probably the expected "
"standard behavior of the crawler.") "standard behavior of the crawler.")
parser.add_argument("-p", "--prefix",
help="Remove the given prefix from the paths "
"of all file objects.")
return parser.parse_args() return parser.parse_args()
def main():
args = parse_args()
return crawler_main(
args.path,
args.cfood,
args.load_identifiables,
args.debug,
args.provenance,
args.dry_sync,
args.sync,
args.prefix
)
if __name__ == "__main__": if __name__ == "__main__":
sys.exit(main()) sys.exit(main())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment