Skip to content
Snippets Groups Projects
Commit 1a963c81 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-type-hint-minor' into 'dev'

MAINT: correct type hints

See merge request !66
parents d86d2603 eb4a11e9
No related branches found
No related tags found
2 merge requests!71REL: RElease v0.2.0,!66MAINT: correct type hints
Pipeline #30565 passed
...@@ -28,6 +28,7 @@ Crawl a file structure using a yaml cfood definition and synchronize ...@@ -28,6 +28,7 @@ Crawl a file structure using a yaml cfood definition and synchronize
the acuired data with CaosDB. the acuired data with CaosDB.
""" """
from __future__ import annotations
import importlib import importlib
from caosadvancedtools.cache import UpdateCache, Cache from caosadvancedtools.cache import UpdateCache, Cache
import uuid import uuid
...@@ -50,7 +51,7 @@ from .identifiable_adapters import (IdentifiableAdapter, ...@@ -50,7 +51,7 @@ from .identifiable_adapters import (IdentifiableAdapter,
LocalStorageIdentifiableAdapter, LocalStorageIdentifiableAdapter,
CaosDBIdentifiableAdapter) CaosDBIdentifiableAdapter)
from collections import defaultdict from collections import defaultdict
from typing import Any, Dict, List, Optional, Type, Union from typing import Any, Optional, Type, Union
from caosdb.apiutils import compare_entities, merge_entities from caosdb.apiutils import compare_entities, merge_entities
from copy import deepcopy from copy import deepcopy
from jsonschema import validate from jsonschema import validate
...@@ -168,7 +169,7 @@ class Crawler(object): ...@@ -168,7 +169,7 @@ class Crawler(object):
generalStore: Optional[GeneralStore] = None, generalStore: Optional[GeneralStore] = None,
debug: bool = False, debug: bool = False,
identifiableAdapter: IdentifiableAdapter = None, identifiableAdapter: IdentifiableAdapter = None,
securityMode: int = SecurityMode.UPDATE securityMode: SecurityMode = SecurityMode.UPDATE
): ):
""" """
Create a new crawler and initialize an empty RecordStore and GeneralStore. Create a new crawler and initialize an empty RecordStore and GeneralStore.
...@@ -209,14 +210,14 @@ class Crawler(object): ...@@ -209,14 +210,14 @@ class Crawler(object):
if identifiableAdapter is None: if identifiableAdapter is None:
self.identifiableAdapter = LocalStorageIdentifiableAdapter() self.identifiableAdapter = LocalStorageIdentifiableAdapter()
# If a directory is crawled this may hold the path to that directory # If a directory is crawled this may hold the path to that directory
self.crawled_directory = None self.crawled_directory: Optional[str] = None
self.debug = debug self.debug = debug
if self.debug: if self.debug:
# order in the tuple: # order in the tuple:
# 0: generalStore # 0: generalStore
# 1: recordStore # 1: recordStore
self.debug_tree: Dict[str, tuple] = dict() self.debug_tree: dict[str, tuple] = dict()
self.debug_metadata: Dict[str, dict] = dict() self.debug_metadata: dict[str, dict] = dict()
self.debug_metadata["copied"] = dict() self.debug_metadata["copied"] = dict()
self.debug_metadata["provenance"] = defaultdict(lambda: dict()) self.debug_metadata["provenance"] = defaultdict(lambda: dict())
self.debug_metadata["usage"] = defaultdict(lambda: set()) self.debug_metadata["usage"] = defaultdict(lambda: set())
...@@ -236,7 +237,7 @@ class Crawler(object): ...@@ -236,7 +237,7 @@ class Crawler(object):
return self._resolve_validator_paths(crawler_definition, crawler_definition_path) return self._resolve_validator_paths(crawler_definition, crawler_definition_path)
def _load_definition_from_yaml_dict(self, crawler_definitions: List[Dict]): def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
"""Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which """Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
contains either one or two documents. contains either one or two documents.
...@@ -258,7 +259,7 @@ class Crawler(object): ...@@ -258,7 +259,7 @@ class Crawler(object):
# tested in the next lines of code: # tested in the next lines of code:
# Load the cfood schema: # Load the cfood schema:
with open(files('caoscrawler').joinpath('cfood-schema.yml'), "r") as f: with open(str(files('caoscrawler').joinpath('cfood-schema.yml')), "r") as f:
schema = yaml.safe_load(f) schema = yaml.safe_load(f)
# Add custom converters to converter enum in schema: # Add custom converters to converter enum in schema:
...@@ -315,7 +316,7 @@ class Crawler(object): ...@@ -315,7 +316,7 @@ class Crawler(object):
""" """
# Defaults for the converter registry: # Defaults for the converter registry:
converter_registry: Dict[str, Dict[str, str]] = { converter_registry: dict[str, dict[str, str]] = {
"Directory": { "Directory": {
"converter": "DirectoryConverter", "converter": "DirectoryConverter",
"package": "caoscrawler.converters"}, "package": "caoscrawler.converters"},
...@@ -430,7 +431,7 @@ class Crawler(object): ...@@ -430,7 +431,7 @@ class Crawler(object):
return converters return converters
def start_crawling(self, items: Union[List[StructureElement], StructureElement], def start_crawling(self, items: Union[list[StructureElement], StructureElement],
crawler_definition: dict, crawler_definition: dict,
converter_registry: dict): converter_registry: dict):
""" """
...@@ -462,8 +463,9 @@ class Crawler(object): ...@@ -462,8 +463,9 @@ class Crawler(object):
self.run_id = uuid.uuid1() self.run_id = uuid.uuid1()
local_converters = Crawler.initialize_converters( local_converters = Crawler.initialize_converters(
crawler_definition, converter_registry) crawler_definition, converter_registry)
# This recursive crawling procedure generates the update list: # This recursive crawling procedure generates the update list:
self.crawled_data: List[db.Record] = [] self.crawled_data: list[db.Record] = []
self._crawl(items, local_converters, self.generalStore, self._crawl(items, local_converters, self.generalStore,
self.recordStore, [], []) self.recordStore, [], [])
...@@ -501,7 +503,7 @@ class Crawler(object): ...@@ -501,7 +503,7 @@ class Crawler(object):
return False return False
@staticmethod @staticmethod
def create_flat_list(ent_list: List[db.Entity], flat: List[db.Entity]): def create_flat_list(ent_list: list[db.Entity], flat: list[db.Entity]):
""" """
Recursively adds all properties contained in entities from ent_list to Recursively adds all properties contained in entities from ent_list to
the output list flat. Each element will only be added once to the list. the output list flat. Each element will only be added once to the list.
...@@ -688,11 +690,11 @@ class Crawler(object): ...@@ -688,11 +690,11 @@ class Crawler(object):
if p.value is old: if p.value is old:
p.value = new p.value = new
def split_into_inserts_and_updates(self, ent_list: List[db.Entity]): def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
if self.identifiableAdapter is None: if self.identifiableAdapter is None:
raise RuntimeError("Should not happen.") raise RuntimeError("Should not happen.")
to_be_inserted: List[db.Entity] = [] to_be_inserted: list[db.Entity] = []
to_be_updated: List[db.Entity] = [] to_be_updated: list[db.Entity] = []
flat = list(ent_list) flat = list(ent_list)
# assure all entities are direct members TODO Can this be removed at some point?Check only? # assure all entities are direct members TODO Can this be removed at some point?Check only?
Crawler.create_flat_list(ent_list, flat) Crawler.create_flat_list(ent_list, flat)
...@@ -720,7 +722,7 @@ class Crawler(object): ...@@ -720,7 +722,7 @@ class Crawler(object):
newrecord = self.get_from_any_cache(record) newrecord = self.get_from_any_cache(record)
merge_entities(newrecord, record) merge_entities(newrecord, record)
Crawler.bend_references_to_new_object( Crawler.bend_references_to_new_object(
old=record, new=newrecord, entities=flat+to_be_updated+to_be_inserted) old=record, new=newrecord, entities=flat + to_be_updated + to_be_inserted)
del flat[i] del flat[i]
resolved_references = True resolved_references = True
...@@ -781,8 +783,8 @@ class Crawler(object): ...@@ -781,8 +783,8 @@ class Crawler(object):
@staticmethod @staticmethod
def _merge_properties_from_remote( def _merge_properties_from_remote(
crawled_data: List[db.Record], crawled_data: list[db.Record],
identified_records: List[db.Record] identified_records: list[db.Record]
): ):
"""Merge entity representation that was created by crawling the data with remotely found """Merge entity representation that was created by crawling the data with remotely found
identified records s.th. new properties and property values are updated correctly but identified records s.th. new properties and property values are updated correctly but
...@@ -823,8 +825,8 @@ class Crawler(object): ...@@ -823,8 +825,8 @@ class Crawler(object):
@staticmethod @staticmethod
def remove_unnecessary_updates( def remove_unnecessary_updates(
crawled_data: List[db.Record], crawled_data: list[db.Record],
identified_records: List[db.Record] identified_records: list[db.Record]
): ):
"""Compare the Records to be updated with their remote """Compare the Records to be updated with their remote
correspondant. Only update if there are actual differences. correspondant. Only update if there are actual differences.
...@@ -896,7 +898,7 @@ class Crawler(object): ...@@ -896,7 +898,7 @@ class Crawler(object):
return db.Entity(id=id).retrieve() return db.Entity(id=id).retrieve()
@staticmethod @staticmethod
def execute_inserts_in_list(to_be_inserted, securityMode, run_id: int = None, def execute_inserts_in_list(to_be_inserted, securityMode, run_id: uuid.UUID = None,
unique_names=True): unique_names=True):
for record in to_be_inserted: for record in to_be_inserted:
for prop in record.properties: for prop in record.properties:
...@@ -924,7 +926,7 @@ class Crawler(object): ...@@ -924,7 +926,7 @@ class Crawler(object):
_resolve_datatype(prop, entity) _resolve_datatype(prop, entity)
@staticmethod @staticmethod
def execute_updates_in_list(to_be_updated, securityMode, run_id: int = None, def execute_updates_in_list(to_be_updated, securityMode, run_id: uuid.UUID = None,
unique_names=True): unique_names=True):
Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated) Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated)
logger.debug("UPDATE") logger.debug("UPDATE")
...@@ -936,7 +938,7 @@ class Crawler(object): ...@@ -936,7 +938,7 @@ class Crawler(object):
update_cache = UpdateCache() update_cache = UpdateCache()
update_cache.insert(to_be_updated, run_id) update_cache.insert(to_be_updated, run_id)
def _synchronize(self, crawled_data: List[db.Record], commit_changes: bool = True, def _synchronize(self, crawled_data: list[db.Record], commit_changes: bool = True,
unique_names=True): unique_names=True):
""" """
This function applies several stages: This function applies several stages:
...@@ -1021,7 +1023,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) ...@@ -1021,7 +1023,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
@staticmethod @staticmethod
def debug_build_usage_tree(converter: Converter): def debug_build_usage_tree(converter: Converter):
res: Dict[str, Dict[str, Any]] = { res: dict[str, dict[str, Any]] = {
converter.name: { converter.name: {
"usage": ", ".join(converter.metadata["usage"]), "usage": ", ".join(converter.metadata["usage"]),
"subtree": {} "subtree": {}
...@@ -1038,7 +1040,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) ...@@ -1038,7 +1040,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
return res return res
def save_debug_data(self, filename: str): def save_debug_data(self, filename: str):
paths: Dict[str, Union[dict, list]] = dict() paths: dict[str, Union[dict, list]] = dict()
def flatten_debug_info(key): def flatten_debug_info(key):
mod_info = self.debug_metadata[key] mod_info = self.debug_metadata[key]
...@@ -1063,11 +1065,11 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) ...@@ -1063,11 +1065,11 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
with open(filename, "w") as f: with open(filename, "w") as f:
f.write(yaml.dump(paths, sort_keys=False)) f.write(yaml.dump(paths, sort_keys=False))
def _crawl(self, items: List[StructureElement], def _crawl(self, items: list[StructureElement],
local_converters: List[Converter], local_converters: list[Converter],
generalStore: GeneralStore, generalStore: GeneralStore,
recordStore: RecordStore, recordStore: RecordStore,
structure_elements_path: List[str], converters_path: List[str]): structure_elements_path: list[str], converters_path: list[str]):
""" """
Crawl a list of StructureElements and apply any matching converters. Crawl a list of StructureElements and apply any matching converters.
...@@ -1155,7 +1157,7 @@ def crawler_main(crawled_directory_path: str, ...@@ -1155,7 +1157,7 @@ def crawler_main(crawled_directory_path: str,
provenance_file: str = None, provenance_file: str = None,
dry_run: bool = False, dry_run: bool = False,
prefix: str = "", prefix: str = "",
securityMode: int = SecurityMode.UPDATE, securityMode: SecurityMode = SecurityMode.UPDATE,
unique_names=True, unique_names=True,
): ):
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment