diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index 1962737dddbe71869846bcd40ecd8b0905ef0907..80a3728ce5b1f413d2bdd674b26a7dca1122eef5 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -346,9 +346,7 @@ class Converter(object, metaclass=ABCMeta): return converter - def create_values(self, - values: GeneralStore, - element: StructureElement): + def create_values(self, values: GeneralStore, element: StructureElement): """ Extract information from the structure element and store them as values in the general store. @@ -367,13 +365,14 @@ class Converter(object, metaclass=ABCMeta): element: StructureElement): pass - def create_records(self, values: GeneralStore, - records: RecordStore, + def create_records(self, values: GeneralStore, records: RecordStore, element: StructureElement): + # TODO why is element passed but not used??? if "records" not in self.definition: return [] + # TODO please rename due to conflict return create_records(values, records, self.definition["records"]) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 03a1c314a528af4b3802fda1269a36656d995624..c77dcee1f29eac69732ce353e0271761eca2df13 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -304,8 +304,7 @@ class Crawler(object): # Validator is given by a path if not value.startswith('/'): # Not an absolute path - definition[key] = os.path.join( - os.path.dirname(definition_path), value) + definition[key] = os.path.join(os.path.dirname(definition_path), value) if not os.path.isfile(definition[key]): # TODO(henrik) capture this in `crawler_main` similar to # `ConverterValidationError`. @@ -313,8 +312,7 @@ class Crawler(object): f"Couldn't find validation file {definition[key]}") elif isinstance(value, dict): # Recursively resolve all validators - definition[key] = self._resolve_validator_paths( - value, definition_path) + definition[key] = self._resolve_validator_paths(value, definition_path) return definition @@ -899,16 +897,17 @@ class Crawler(object): because some changes in parents (e.g. of Files) might fail if they are not updated first. """ + logger.debug("=== Going to execute parent updates ===") Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated) parent_updates = db.Container() - for record in to_be_updated: - old_entity = Crawler._get_entity_by_id(record.id) + for entity in to_be_updated: + old_entity = Crawler._get_entity_by_id(entity.id) # Check whether the parents have been changed and add them if missing # in the old entity: changes_made = False - for parent in record.parents: + for parent in entity.parents: found = False for old_parent in old_entity.parents: if old_parent.id == parent.id: @@ -1181,7 +1180,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) generalStore_copy = generalStore.create_scoped_copy() recordStore_copy = recordStore.create_scoped_copy() - # Create an entry for this matched structure element: + # Create an entry for this matched structure element that contains the path: generalStore_copy[converter.name] = ( os.path.join(*(structure_elements_path + [element.get_name()])))