Skip to content
Snippets Groups Projects

MAINT: allow to start the crawler with generic structure elements

Merged Henrik tom Wörden requested to merge f-generic-struct into dev
1 file
+ 6
3
Compare changes
  • Side-by-side
  • Inline
+ 6
3
@@ -286,13 +286,13 @@ class Crawler(object):
return local_converters
def start_crawling(self, item: StructureElement,
def start_crawling(self, items,
crawler_definition: dict,
converter_registry: dict):
"""
Start point of the crawler recursion.
item: A structure element that is used for generating the initial items for the crawler.
items: A list of structure elements that is used for generating the initial items for the crawler.
This could e.g. be a Directory.
crawler_definition: A dictionary representing the crawler definition, possibly from a yaml
file.
@@ -305,11 +305,14 @@ class Crawler(object):
if self.generalStore is None:
raise RuntimeError("Should not happen.")
if not isinstance(items, list):
items = [items]
local_converters = Crawler.create_local_converters(crawler_definition,
converter_registry)
# This recursive crawling procedure generates the update list:
self.updateList: list[db.Record] = []
self._crawl([item],
self._crawl(items,
self.global_converters, local_converters, self.generalStore, self.recordStore,
[], [])
Loading