Skip to content
Snippets Groups Projects
Commit 2dca9c2e authored by Alexander Schlemmer's avatar Alexander Schlemmer
Browse files

Merge branch 'f-global' into 'dev'

MAINT: remove global converters

See merge request !44
parents f6eec1b4 b572ef01
Branches
Tags
2 merge requests!53Release 0.1,!44MAINT: remove global converters
Pipeline #28978 passed
...@@ -20,7 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -20,7 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed ### Changed
* Renamed module from `newcrawler` to `caoscrawler` * MAINT: Renamed module from `newcrawler` to `caoscrawler`
* MAINT: Removed global converters from `crawl.py`
### Deprecated ### Deprecated
......
...@@ -165,7 +165,6 @@ class Crawler(object): ...@@ -165,7 +165,6 @@ class Crawler(object):
""" """
def __init__(self, def __init__(self,
converters: List[Converter] = [],
generalStore: Optional[GeneralStore] = None, generalStore: Optional[GeneralStore] = None,
debug: bool = False, debug: bool = False,
identifiableAdapter: IdentifiableAdapter = None, identifiableAdapter: IdentifiableAdapter = None,
...@@ -176,8 +175,6 @@ class Crawler(object): ...@@ -176,8 +175,6 @@ class Crawler(object):
Parameters Parameters
---------- ----------
converters : List[Converter]
The set of converters used for this crawler.
recordStore : GeneralStore recordStore : GeneralStore
An initial GeneralStore which might store e.g. environment variables. An initial GeneralStore which might store e.g. environment variables.
debug : bool debug : bool
...@@ -197,7 +194,6 @@ class Crawler(object): ...@@ -197,7 +194,6 @@ class Crawler(object):
""" """
# TODO: check if this feature is really needed # TODO: check if this feature is really needed
self.global_converters = converters
self.identified_cache = IdentifiedCache() self.identified_cache = IdentifiedCache()
self.recordStore = RecordStore() self.recordStore = RecordStore()
...@@ -409,8 +405,7 @@ class Crawler(object): ...@@ -409,8 +405,7 @@ class Crawler(object):
continue continue
elif key == "Converters": elif key == "Converters":
continue continue
converters.append(Converter.converter_factory( converters.append(Converter.converter_factory(value, key, converter_registry))
value, key, converter_registry))
return converters return converters
...@@ -448,12 +443,11 @@ class Crawler(object): ...@@ -448,12 +443,11 @@ class Crawler(object):
crawler_definition, converter_registry) crawler_definition, converter_registry)
# This recursive crawling procedure generates the update list: # This recursive crawling procedure generates the update list:
self.target_data: List[db.Record] = [] self.target_data: List[db.Record] = []
self._crawl(items, self._crawl(items, local_converters, self.generalStore,
self.global_converters, local_converters, self.generalStore, self.recordStore, self.recordStore, [], [])
[], [])
if self.debug: if self.debug:
self.debug_converters = self.global_converters + local_converters self.debug_converters = local_converters
return self.target_data return self.target_data
...@@ -947,7 +941,6 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) ...@@ -947,7 +941,6 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
f.write(yaml.dump(paths, sort_keys=False)) f.write(yaml.dump(paths, sort_keys=False))
def _crawl(self, items: List[StructureElement], def _crawl(self, items: List[StructureElement],
global_converters: List[Converter],
local_converters: List[Converter], local_converters: List[Converter],
generalStore: GeneralStore, generalStore: GeneralStore,
recordStore: RecordStore, recordStore: RecordStore,
...@@ -956,7 +949,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) ...@@ -956,7 +949,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
Crawl a list of StructureElements and apply any matching converters. Crawl a list of StructureElements and apply any matching converters.
items: structure_elements (e.g. files and folders on one level on the hierarchy) items: structure_elements (e.g. files and folders on one level on the hierarchy)
global_converters and local_converters: globally or locally defined converters for local_converters: locally defined converters for
treating structure elements. A locally defined converter could be treating structure elements. A locally defined converter could be
one that is only valid for a specific subtree of the originally one that is only valid for a specific subtree of the originally
cralwed StructureElement structure. cralwed StructureElement structure.
...@@ -964,7 +957,8 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) ...@@ -964,7 +957,8 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
global stores of the Crawler object. global stores of the Crawler object.
""" """
for element in items: for element in items:
for converter in global_converters + local_converters: for converter in local_converters:
# type is something like "matches files", replace isinstance with "type_matches" # type is something like "matches files", replace isinstance with "type_matches"
# match function tests regexp for example # match function tests regexp for example
if (converter.typecheck(element) and if (converter.typecheck(element) and
...@@ -1006,7 +1000,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) ...@@ -1006,7 +1000,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3]))
structure_elements_path + [element.get_name()], structure_elements_path + [element.get_name()],
converters_path + [converter.name]) converters_path + [converter.name])
self._crawl(children, global_converters, converter.converters, self._crawl(children, converter.converters,
generalStore_copy, recordStore_copy, generalStore_copy, recordStore_copy,
structure_elements_path + [element.get_name()], structure_elements_path + [element.get_name()],
converters_path + [converter.name]) converters_path + [converter.name])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment