From 96293ef28bf0f670f77249e7b04dbc4477827c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20tom=20W=C3=B6rden?= <h.tomwoerden@indiscale.com> Date: Fri, 30 Sep 2022 13:52:30 +0200 Subject: [PATCH] MAINT: doc string and rename of create_local_converters" --- src/caoscrawler/converters.py | 4 ++++ src/caoscrawler/crawl.py | 17 ++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 39a6bf2b..97b81cd2 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -283,6 +283,10 @@ class Converter(object, metaclass=ABCMeta): def converter_factory(definition: dict, name: str, converter_registry: dict): + """creates a Converter instance of the appropriate class. + + The `type` key in the `definition` defines the Converter class which is being used. + """ if "type" not in definition: raise RuntimeError( diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 0704cee9..7c58656d 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -376,9 +376,13 @@ class Crawler(object): converter_registry) @staticmethod - def create_local_converters(crawler_definition: dict, - converter_registry: dict): - local_converters = [] + def initialize_converters(crawler_definition: dict, converter_registry: dict): + """ + takes the cfood as dict (`crawler_definition`) and creates the converter objects that + are defined on the highest level. Child Converters will in turn be created during the + initialization of the Converters. + """ + converters = [] for key, value in crawler_definition.items(): # Definitions and Converters are reserved keywords @@ -390,10 +394,10 @@ class Crawler(object): continue elif key == "Converters": continue - local_converters.append(Converter.converter_factory( + converters.append(Converter.converter_factory( value, key, converter_registry)) - return local_converters + return converters def start_crawling(self, items: Union[List[StructureElement], StructureElement], crawler_definition: dict, @@ -425,8 +429,7 @@ class Crawler(object): items = [items] self.run_id = uuid.uuid1() - local_converters = Crawler.create_local_converters(crawler_definition, - converter_registry) + local_converters = Crawler.initialize_converters(crawler_definition, converter_registry) # This recursive crawling procedure generates the update list: self.target_data: List[db.Record] = [] self._crawl(items, -- GitLab