diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index ed48c130c578734a1757eb59b6778814085a8bf4..0d1c5d631e7ba4bea61e6c437f6c125b1a55125a 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -329,9 +329,7 @@ class Converter(object, metaclass=ABCMeta): return converter - def create_values(self, - values: GeneralStore, - element: StructureElement): + def create_values(self, values: GeneralStore, element: StructureElement): """ Extract information from the structure element and store them as values in the general store. @@ -350,13 +348,14 @@ class Converter(object, metaclass=ABCMeta): element: StructureElement): pass - def create_records(self, values: GeneralStore, - records: RecordStore, + def create_records(self, values: GeneralStore, records: RecordStore, element: StructureElement): + # TODO why is element passed but not used??? if "records" not in self.definition: return [] + # TODO please rename due to conflict return create_records(values, records, self.definition["records"]) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index fe6c63ff7af86a76fd6649323c96b8977b9fcc2f..ef06ab15777be7405c924860d8d0ab0cc32ff3bc 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -303,8 +303,7 @@ class Crawler(object): # Validator is given by a path if not value.startswith('/'): # Not an absolute path - definition[key] = os.path.join( - os.path.dirname(definition_path), value) + definition[key] = os.path.join(os.path.dirname(definition_path), value) if not os.path.isfile(definition[key]): # TODO(henrik) capture this in `crawler_main` similar to # `ConverterValidationError`. @@ -312,8 +311,7 @@ class Crawler(object): f"Couldn't find validation file {definition[key]}") elif isinstance(value, dict): # Recursively resolve all validators - definition[key] = self._resolve_validator_paths( - value, definition_path) + definition[key] = self._resolve_validator_paths(value, definition_path) return definition @@ -898,16 +896,17 @@ class Crawler(object): because some changes in parents (e.g. of Files) might fail if they are not updated first. """ + logger.debug("=== Going to execute parent updates ===") Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated) parent_updates = db.Container() - for record in to_be_updated: - old_entity = Crawler._get_entity_by_id(record.id) + for entity in to_be_updated: + old_entity = Crawler._get_entity_by_id(entity.id) # Check whether the parents have been changed and add them if missing # in the old entity: changes_made = False - for parent in record.parents: + for parent in entity.parents: found = False for old_parent in old_entity.parents: if old_parent.id == parent.id: @@ -1156,7 +1155,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) generalStore_copy = generalStore.create_scoped_copy() recordStore_copy = recordStore.create_scoped_copy() - # Create an entry for this matched structure element: + # Create an entry for this matched structure element that contains the path: generalStore_copy[converter.name] = ( os.path.join(*(structure_elements_path + [element.get_name()])))