diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 39a6bf2b987cb68ffffd359923c255c8af54839f..97b81cd2e28faf2310e84abd5bb98aba9b60b308 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -283,6 +283,10 @@ class Converter(object, metaclass=ABCMeta): def converter_factory(definition: dict, name: str, converter_registry: dict): + """creates a Converter instance of the appropriate class. + + The `type` key in the `definition` defines the Converter class which is being used. + """ if "type" not in definition: raise RuntimeError( diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 0704cee918f771bb01ec624b01aa529821a29edc..7c58656d99efe06e5713571260535e8da52fbc3d 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -376,9 +376,13 @@ class Crawler(object): converter_registry) @staticmethod - def create_local_converters(crawler_definition: dict, - converter_registry: dict): - local_converters = [] + def initialize_converters(crawler_definition: dict, converter_registry: dict): + """ + takes the cfood as dict (`crawler_definition`) and creates the converter objects that + are defined on the highest level. Child Converters will in turn be created during the + initialization of the Converters. + """ + converters = [] for key, value in crawler_definition.items(): # Definitions and Converters are reserved keywords @@ -390,10 +394,10 @@ class Crawler(object): continue elif key == "Converters": continue - local_converters.append(Converter.converter_factory( + converters.append(Converter.converter_factory( value, key, converter_registry)) - return local_converters + return converters def start_crawling(self, items: Union[List[StructureElement], StructureElement], crawler_definition: dict, @@ -425,8 +429,7 @@ class Crawler(object): items = [items] self.run_id = uuid.uuid1() - local_converters = Crawler.create_local_converters(crawler_definition, - converter_registry) + local_converters = Crawler.initialize_converters(crawler_definition, converter_registry) # This recursive crawling procedure generates the update list: self.target_data: List[db.Record] = [] self._crawl(items,