Skip to content
Snippets Groups Projects
Commit 1a963c81 authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-type-hint-minor' into 'dev'

MAINT: correct type hints

See merge request !66
parents d86d2603 eb4a11e9
No related branches found
No related tags found
2 merge requests!71REL: RElease v0.2.0,!66MAINT: correct type hints
Pipeline #30565 passed
......@@ -28,6 +28,7 @@ Crawl a file structure using a yaml cfood definition and synchronize
the acuired data with CaosDB.
"""
from __future__ import annotations
import importlib
from caosadvancedtools.cache import UpdateCache, Cache
import uuid
......@@ -50,7 +51,7 @@ from .identifiable_adapters import (IdentifiableAdapter,
LocalStorageIdentifiableAdapter,
CaosDBIdentifiableAdapter)
from collections import defaultdict
from typing import Any, Dict, List, Optional, Type, Union
from typing import Any, Optional, Type, Union
from caosdb.apiutils import compare_entities, merge_entities
from copy import deepcopy
from jsonschema import validate
......@@ -168,7 +169,7 @@ class Crawler(object):
generalStore: Optional[GeneralStore] = None,
debug: bool = False,
identifiableAdapter: IdentifiableAdapter = None,
securityMode: int = SecurityMode.UPDATE
securityMode: SecurityMode = SecurityMode.UPDATE
):
"""
Create a new crawler and initialize an empty RecordStore and GeneralStore.
......@@ -209,14 +210,14 @@ class Crawler(object):
if identifiableAdapter is None:
self.identifiableAdapter = LocalStorageIdentifiableAdapter()
# If a directory is crawled this may hold the path to that directory
self.crawled_directory = None
self.crawled_directory: Optional[str] = None
self.debug = debug
if self.debug:
# order in the tuple:
# 0: generalStore
# 1: recordStore
self.debug_tree: Dict[str, tuple] = dict()
self.debug_metadata: Dict[str, dict] = dict()
self.debug_tree: dict[str, tuple] = dict()
self.debug_metadata: dict[str, dict] = dict()
self.debug_metadata["copied"] = dict()
self.debug_metadata["provenance"] = defaultdict(lambda: dict())
self.debug_metadata["usage"] = defaultdict(lambda: set())
......@@ -236,7 +237,7 @@ class Crawler(object):
return self._resolve_validator_paths(crawler_definition, crawler_definition_path)
def _load_definition_from_yaml_dict(self, crawler_definitions: List[Dict]):
def _load_definition_from_yaml_dict(self, crawler_definitions: list[dict]):
"""Load crawler definitions from a list of (yaml) dicts `crawler_definitions` which
contains either one or two documents.
......@@ -258,7 +259,7 @@ class Crawler(object):
# tested in the next lines of code:
# Load the cfood schema:
with open(files('caoscrawler').joinpath('cfood-schema.yml'), "r") as f:
with open(str(files('caoscrawler').joinpath('cfood-schema.yml')), "r") as f:
schema = yaml.safe_load(f)
# Add custom converters to converter enum in schema:
......@@ -315,7 +316,7 @@ class Crawler(object):
"""
# Defaults for the converter registry:
converter_registry: Dict[str, Dict[str, str]] = {
converter_registry: dict[str, dict[str, str]] = {
"Directory": {
"converter": "DirectoryConverter",
"package": "caoscrawler.converters"},
......@@ -430,7 +431,7 @@ class Crawler(object):
return converters
def start_crawling(self, items: Union[List[StructureElement], StructureElement],
def start_crawling(self, items: Union[list[StructureElement], StructureElement],
crawler_definition: dict,
converter_registry: dict):
"""
......@@ -462,8 +463,9 @@ class Crawler(object):
self.run_id = uuid.uuid1()
local_converters = Crawler.initialize_converters(
crawler_definition, converter_registry)
# This recursive crawling procedure generates the update list:
self.crawled_data: List[db.Record] = []
self.crawled_data: list[db.Record] = []
self._crawl(items, local_converters, self.generalStore,
self.recordStore, [], [])
......@@ -501,7 +503,7 @@ class Crawler(object):
return False
@staticmethod
def create_flat_list(ent_list: List[db.Entity], flat: List[db.Entity]):
def create_flat_list(ent_list: list[db.Entity], flat: list[db.Entity]):
"""
Recursively adds all properties contained in entities from ent_list to
the output list flat. Each element will only be added once to the list.
......@@ -688,11 +690,11 @@ class Crawler(object):
if p.value is old:
p.value = new
def split_into_inserts_and_updates(self, ent_list: List[db.Entity]):
def split_into_inserts_and_updates(self, ent_list: list[db.Entity]):
if self.identifiableAdapter is None:
raise RuntimeError("Should not happen.")
to_be_inserted: List[db.Entity] = []
to_be_updated: List[db.Entity] = []
to_be_inserted: list[db.Entity] = []
to_be_updated: list[db.Entity] = []
flat = list(ent_list)
# assure all entities are direct members TODO Can this be removed at some point?Check only?
Crawler.create_flat_list(ent_list, flat)
......@@ -720,7 +722,7 @@ class Crawler(object):
newrecord = self.get_from_any_cache(record)
merge_entities(newrecord, record)
Crawler.bend_references_to_new_object(
old=record, new=newrecord, entities=flat+to_be_updated+to_be_inserted)
old=record, new=newrecord, entities=flat + to_be_updated + to_be_inserted)
del flat[i]
resolved_references = True
......@@ -781,8 +783,8 @@ class Crawler(object):
@staticmethod
def _merge_properties_from_remote(
crawled_data: List[db.Record],
identified_records: List[db.Record]
crawled_data: list[db.Record],
identified_records: list[db.Record]
):
"""Merge entity representation that was created by crawling the data with remotely found
identified records s.th. new properties and property values are updated correctly but
......@@ -823,8 +825,8 @@ class Crawler(object):
@staticmethod
def remove_unnecessary_updates(
crawled_data: List[db.Record],
identified_records: List[db.Record]
crawled_data: list[db.Record],
identified_records: list[db.Record]
):
"""Compare the Records to be updated with their remote
correspondant. Only update if there are actual differences.
......@@ -896,7 +898,7 @@ class Crawler(object):
return db.Entity(id=id).retrieve()
@staticmethod
def execute_inserts_in_list(to_be_inserted, securityMode, run_id: int = None,
def execute_inserts_in_list(to_be_inserted, securityMode, run_id: uuid.UUID = None,
unique_names=True):
for record in to_be_inserted:
for prop in record.properties:
......@@ -924,7 +926,7 @@ class Crawler(object):
_resolve_datatype(prop, entity)
@staticmethod
def execute_updates_in_list(to_be_updated, securityMode, run_id: int = None,
def execute_updates_in_list(to_be_updated, securityMode, run_id: uuid.UUID = None,
unique_names=True):
Crawler.set_ids_and_datatype_of_parents_and_properties(to_be_updated)
logger.debug("UPDATE")
......@@ -936,7 +938,7 @@ class Crawler(object):
update_cache = UpdateCache()
update_cache.insert(to_be_updated, run_id)
def _synchronize(self, crawled_data: List[db.Record], commit_changes: bool = True,
def _synchronize(self, crawled_data: list[db.Record], commit_changes: bool = True,
unique_names=True):
"""
This function applies several stages:
......@@ -1021,7 +1023,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
@staticmethod
def debug_build_usage_tree(converter: Converter):
res: Dict[str, Dict[str, Any]] = {
res: dict[str, dict[str, Any]] = {
converter.name: {
"usage": ", ".join(converter.metadata["usage"]),
"subtree": {}
......@@ -1038,7 +1040,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
return res
def save_debug_data(self, filename: str):
paths: Dict[str, Union[dict, list]] = dict()
paths: dict[str, Union[dict, list]] = dict()
def flatten_debug_info(key):
mod_info = self.debug_metadata[key]
......@@ -1063,11 +1065,11 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
with open(filename, "w") as f:
f.write(yaml.dump(paths, sort_keys=False))
def _crawl(self, items: List[StructureElement],
local_converters: List[Converter],
def _crawl(self, items: list[StructureElement],
local_converters: list[Converter],
generalStore: GeneralStore,
recordStore: RecordStore,
structure_elements_path: List[str], converters_path: List[str]):
structure_elements_path: list[str], converters_path: list[str]):
"""
Crawl a list of StructureElements and apply any matching converters.
......@@ -1155,7 +1157,7 @@ def crawler_main(crawled_directory_path: str,
provenance_file: str = None,
dry_run: bool = False,
prefix: str = "",
securityMode: int = SecurityMode.UPDATE,
securityMode: SecurityMode = SecurityMode.UPDATE,
unique_names=True,
):
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment