diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a21627b47266a21a75a2b43f93d70f5b0beb105..63e69dacc4e0d00b208829c7d8bb1b9566233804 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Allow authorization of inserts * Allow splitting cfoods into multiple yaml documents * Implemented macros +* Converters can now filter the list of children ### Changed @@ -26,7 +27,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -* Fixed #12 +* FIX: #12 * FIX: Variables are now also replaced when the value is given as a list. +* FIX: #35 Parent cannot be set from value +* [#6](https://gitlab.com/caosdb/caosdb-crawler/-/issues/6): Fixed many type + hints to be compatible to python 3.8 + ### Security diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..95fc2bf61473b94decfb43d0c5ba0d3fda535a07 --- /dev/null +++ b/Makefile @@ -0,0 +1,48 @@ +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2020 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2020 Daniel Hornung <d.hornung@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header + +# This Makefile is a wrapper for several other scripts. + +.PHONY: help + +help: + @echo 'Type `make doc` for documentation, or `make install` for (local) installation.' + +doc: + $(MAKE) -C src/doc html + +install: + @echo "Not implemented yet, use pip for installation." + +check: style lint +.PHONY: check + +style: + pycodestyle --count src unittests +.PHONY: style + +lint: + pylint --unsafe-load-any-extension=y -d all -e E,F src/caoscrawler +.PHONY: lint + +unittest: + tox -r +.PHONY: unittest diff --git a/integrationtests/test_realworld_example.py b/integrationtests/test_realworld_example.py index 5ec2f3219625937e3d18f31eaaa2eb71566c75d7..19a9aca2eaabce59b9570ade9ad7bae6eb43f9b9 100644 --- a/integrationtests/test_realworld_example.py +++ b/integrationtests/test_realworld_example.py @@ -29,7 +29,7 @@ import os import caosdb as db -from caoscrawler.crawl import Crawler, main as crawler_main +from caoscrawler.crawl import Crawler, crawler_main from caoscrawler.converters import JSONFileConverter, DictConverter from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter from caoscrawler.structure_elements import File, JSONFile, Directory diff --git a/integrationtests/test_use_case_simple_presentation.py b/integrationtests/test_use_case_simple_presentation.py index f1c838d1aadf4cb8b51043a8a24b93eddf275c75..bf16ef3be7179372ace7d05d67ffee33890fcc3c 100644 --- a/integrationtests/test_use_case_simple_presentation.py +++ b/integrationtests/test_use_case_simple_presentation.py @@ -32,7 +32,7 @@ from subprocess import run import caosdb as db from caosadvancedtools.loadFiles import loadpath from caosadvancedtools.models import parser as parser -from caoscrawler.crawl import main as crawler_main +from caoscrawler.crawl import crawler_main # TODO(fspreck) Re-eneable once this is part of dev in advancedusertools. diff --git a/setup.cfg b/setup.cfg index 9c652aa9ad32757075bd37f0bd5efeadcaa34582..0351d56dec59ee0b33c10be1f825e5d1d04f8504 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,7 +22,7 @@ install_requires = importlib-resources caosdb caosadvancedtools - yaml-header-tools + yaml-header-tools>=0.2.1 pyyaml odfpy #make optional pandas diff --git a/src/caoscrawler/converters.py b/src/caoscrawler/converters.py index e3f72b10ce2694853d6bc0644c736f0d621ed881..3fbf9939664af20f35150e5fff95854634ea3040 100644 --- a/src/caoscrawler/converters.py +++ b/src/caoscrawler/converters.py @@ -35,7 +35,7 @@ from .structure_elements import (StructureElement, Directory, File, Dict, JSONFi DictIntegerElement, DictBooleanElement, DictFloatElement, DictDictElement, TextElement, DictTextElement, DictElement, DictListElement) -from typing import Optional, Union +from typing import Dict as Dict_t, List, Optional, Tuple, Union from abc import abstractmethod from string import Template import yaml_header_tools @@ -50,6 +50,23 @@ SPECIAL_PROPERTIES = ("description", "name", "id", "path", "file", "checksum", "size") +def _only_max(children_with_keys): + + return [max(children_with_keys, key=lambda x: x[1])[0]] + + +def _only_min(children_with_keys): + + return [min(children_with_keys, key=lambda x: x[1])[0]] + + +# names of functions that can be used to filter children +FILTER_FUNCTIONS = { + "only_max": _only_max, + "only_min": _only_min, +} + + def str_to_bool(x): if str(x).lower() == "true": return True @@ -67,6 +84,18 @@ class ConverterValidationError(Exception): def replace_variables(propvalue, values: GeneralStore): + """ + This function replaces variables in property values (and possibly other locations, + where the crawler can replace cfood-internal variables). + + This function checks whether the value that is to be replaced is of type db.Entity. + In this case the entity is returned (note that this is of course only possible, if the + occurrence of the variable is directly at the beginning of the value and e.g. no string + concatenation is attempted. + + In any other case the variable substitution is carried out and a new string with the + replaced variables is returned. + """ # Check if the replacement is a single variable containing a record: match = re.match(r"^\$(\{)?(?P<varname>[0-9a-zA-Z_]+)(\})?$", propvalue) if match is not None: @@ -213,8 +242,10 @@ def create_records(values: GeneralStore, # parents will be added when they aren't present in the record yet: if "parents" in record: for parent in record["parents"]: - if not has_parent(c_record, parent): - c_record.add_parent(parent) + # Do the variables replacement: + var_replaced_parent = replace_variables(parent, values) + if not has_parent(c_record, var_replaced_parent): + c_record.add_parent(var_replaced_parent) else: # add the "fallback" parent only for Records, not for Files: if role == "Record": @@ -236,7 +267,7 @@ class Converter(object): self.name = name # Used to store usage information for debugging: - self.metadata: dict[str, set[str]] = { + self.metadata: Dict_t[str, set[str]] = { "usage": set() } @@ -301,6 +332,31 @@ class Converter(object): records, self.definition["records"]) + def filter_children(self, children_with_strings: + List[Tuple[StructureElement, str]], expr: str, + group: str, rule: str): + """Filter children according to regexp `expr` and `rule`.""" + + if rule not in FILTER_FUNCTIONS: + raise RuntimeError( + f"{rule} is not a known filter rule. Only {list(FILTER_FUNCTIONS.keys())} are implemented." + ) + + to_be_filtered = [] + unmatched_children = [] + + for (child, name) in children_with_strings: + + m = re.match(expr, name) + if m is None: + unmatched_children.append(child) + else: + to_be_filtered.append((child, m.groupdict()[group])) + + filtered_children = FILTER_FUNCTIONS[rule](to_be_filtered) + + return filtered_children+unmatched_children + @abstractmethod def typecheck(self, element: StructureElement): pass @@ -325,7 +381,15 @@ class DirectoryConverter(Converter): raise RuntimeError( "Directory converters can only create children from directories.") - return self.create_children_from_directory(element) + children = self.create_children_from_directory(element) + + if "filter" in self.definition: + + tuple_list = [(c, c.name) for c in children] + + return self.filter_children(tuple_list, **self.definition["filter"]) + + return children def typecheck(self, element: StructureElement): return isinstance(element, Directory) @@ -346,7 +410,7 @@ class DirectoryConverter(Converter): element: A directory (of type Directory) which will be traversed. """ - children: list[StructureElement] = [] + children: List[StructureElement] = [] for name in sorted(os.listdir(element.path)): path = os.path.join(element.path, name) @@ -395,7 +459,7 @@ class MarkdownFileConverter(Converter): header = yaml_header_tools.get_header_from_file( element.path, clean=False) - children: list[StructureElement] = [] + children: List[StructureElement] = [] for name, entry in header.items(): if type(entry) == list: @@ -449,6 +513,8 @@ class DictConverter(Converter): children.append(DictBooleanElement(name, value)) elif type(value) == float: children.append(DictFloatElement(name, value)) + elif type(value) == type(None): + continue else: children.append(DictElement(name, value)) warnings.warn(f"The value in the dict for key:{name} has an unknown type. " @@ -675,7 +741,8 @@ class TableConverter(Converter): # The option can often either be a single value or a list of values. # In the latter case each element of the list will be converted to the defined type. if isinstance(el, list): - option_dict[opt_name] = [opt_conversion(el_el) for el_el in el] + option_dict[opt_name] = [ + opt_conversion(el_el) for el_el in el] else: option_dict[opt_name] = opt_conversion(el) return option_dict diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 701afba07ffc460efc13d0ea3aeea0ec054f45d7..c3381ed5f2a1ad746ab79208e9376e04dc28137a 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -50,7 +50,7 @@ from .identifiable_adapters import (IdentifiableAdapter, LocalStorageIdentifiableAdapter, CaosDBIdentifiableAdapter) from collections import defaultdict -from typing import Union, Any, Optional, Type +from typing import Any, Dict, List, Optional, Type, Union from caosdb.apiutils import compare_entities, merge_entities from copy import deepcopy from jsonschema import validate @@ -165,7 +165,7 @@ class Crawler(object): """ def __init__(self, - converters: list[Converter] = [], + converters: List[Converter] = [], generalStore: Optional[GeneralStore] = None, debug: bool = False, identifiableAdapter: IdentifiableAdapter = None, @@ -176,7 +176,7 @@ class Crawler(object): Parameters ---------- - converters : list[Converter] + converters : List[Converter] The set of converters used for this crawler. recordStore : GeneralStore An initial GeneralStore which might store e.g. environment variables. @@ -217,8 +217,8 @@ class Crawler(object): # order in the tuple: # 0: generalStore # 1: recordStore - self.debug_tree: dict[str, tuple] = dict() - self.debug_metadata: dict[str, dict] = dict() + self.debug_tree: Dict[str, tuple] = dict() + self.debug_metadata: Dict[str, dict] = dict() self.debug_metadata["copied"] = dict() self.debug_metadata["provenance"] = defaultdict(lambda: dict()) self.debug_metadata["usage"] = defaultdict(lambda: set()) @@ -298,7 +298,7 @@ class Crawler(object): """ # Defaults for the converter registry: - converter_registry: dict[str, dict[str, str]] = { + converter_registry: Dict[str, Dict[str, str]] = { "Directory": { "converter": "DirectoryConverter", "package": "caoscrawler.converters"}, @@ -409,7 +409,7 @@ class Crawler(object): return local_converters - def start_crawling(self, items: Union[list[StructureElement], StructureElement], + def start_crawling(self, items: Union[List[StructureElement], StructureElement], crawler_definition: dict, converter_registry: dict): """ @@ -442,7 +442,7 @@ class Crawler(object): local_converters = Crawler.create_local_converters(crawler_definition, converter_registry) # This recursive crawling procedure generates the update list: - self.target_data: list[db.Record] = [] + self.target_data: List[db.Record] = [] self._crawl(items, self.global_converters, local_converters, self.generalStore, self.recordStore, [], []) @@ -487,7 +487,7 @@ class Crawler(object): return False return True - def create_flat_list(self, ent_list: list[db.Entity], flat: list[db.Entity]): + def create_flat_list(self, ent_list: List[db.Entity], flat: List[db.Entity]): """ Recursively adds all properties contained in entities from ent_list to the output list flat. Each element will only be added once to the list. @@ -620,11 +620,11 @@ class Crawler(object): merge_entities(to, fro) - def split_into_inserts_and_updates(self, ent_list: list[db.Entity]): + def split_into_inserts_and_updates(self, ent_list: List[db.Entity]): if self.identifiableAdapter is None: raise RuntimeError("Should not happen.") - to_be_inserted: list[db.Entity] = [] - to_be_updated: list[db.Entity] = [] + to_be_inserted: List[db.Entity] = [] + to_be_updated: List[db.Entity] = [] flat = list(ent_list) # assure all entities are direct members TODO Can this be removed at some point?Check only? self.create_flat_list(ent_list, flat) @@ -752,8 +752,8 @@ class Crawler(object): el.value[index] = val.id @staticmethod - def remove_unnecessary_updates(target_data: list[db.Record], - identified_records: list[db.Record]): + def remove_unnecessary_updates(target_data: List[db.Record], + identified_records: List[db.Record]): """ checks whether all relevant attributes (especially Property values) are equal @@ -818,7 +818,7 @@ class Crawler(object): update_cache = UpdateCache() update_cache.insert(to_be_updated, run_id) - def _synchronize(self, target_data: list[db.Record], commit_changes: bool = True): + def _synchronize(self, target_data: List[db.Record], commit_changes: bool = True): """ This function applies several stages: 1) Retrieve identifiables for all records in target_data. @@ -837,7 +837,8 @@ class Crawler(object): if self.identifiableAdapter is None: raise RuntimeError("Should not happen.") - to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(target_data) + to_be_inserted, to_be_updated = self.split_into_inserts_and_updates( + target_data) # TODO: refactoring of typo for el in to_be_updated: @@ -845,14 +846,17 @@ class Crawler(object): self.replace_entities_with_ids(el) identified_records = [ - self.identifiableAdapter.retrieve_identified_record_for_record(record) + self.identifiableAdapter.retrieve_identified_record_for_record( + record) for record in to_be_updated] # remove unnecessary updates from list by comparing the target records to the existing ones self.remove_unnecessary_updates(to_be_updated, identified_records) if commit_changes: - self.execute_inserts_in_list(to_be_inserted, self.securityMode, self.run_id) - self.execute_updates_in_list(to_be_updated, self.securityMode, self.run_id) + self.execute_inserts_in_list( + to_be_inserted, self.securityMode, self.run_id) + self.execute_updates_in_list( + to_be_updated, self.securityMode, self.run_id) update_cache = UpdateCache() pending_inserts = update_cache.get_inserts(self.run_id) @@ -873,7 +877,8 @@ class Crawler(object): # only done in SSS mode if "SHARED_DIR" in os.environ: - filename = OldCrawler.save_form([el[3] for el in pending_changes], path, run_id) + filename = OldCrawler.save_form( + [el[3] for el in pending_changes], path, run_id) OldCrawler.send_mail([el[3] for el in pending_changes], filename) for i, el in enumerate(pending_changes): @@ -884,13 +889,14 @@ UNAUTHORIZED UPDATE ({} of {}): ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) logger.info("There were unauthorized changes (see above). An " "email was sent to the curator.\n" - "You can authorize the " + ("inserts" if inserts else "updates") + "You can authorize the " + + ("inserts" if inserts else "updates") + " by invoking the crawler" " with the run id: {rid}\n".format(rid=run_id)) @staticmethod def debug_build_usage_tree(converter: Converter): - res: dict[str, dict[str, Any]] = { + res: Dict[str, Dict[str, Any]] = { converter.name: { "usage": ", ".join(converter.metadata["usage"]), "subtree": {} @@ -907,7 +913,7 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) return res def save_debug_data(self, filename: str): - paths: dict[str, Union[dict, list]] = dict() + paths: Dict[str, Union[dict, list]] = dict() def flatten_debug_info(key): mod_info = self.debug_metadata[key] @@ -932,12 +938,12 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) with open(filename, "w") as f: f.write(yaml.dump(paths, sort_keys=False)) - def _crawl(self, items: list[StructureElement], - global_converters: list[Converter], - local_converters: list[Converter], + def _crawl(self, items: List[StructureElement], + global_converters: List[Converter], + local_converters: List[Converter], generalStore: GeneralStore, recordStore: RecordStore, - structure_elements_path: list[str], converters_path: list[str]): + structure_elements_path: List[str], converters_path: List[str]): """ Crawl a list of StructureElements and apply any matching converters. @@ -1017,14 +1023,14 @@ ____________________\n""".format(i + 1, len(pending_changes)) + str(el[3])) return self.target_data -def main(crawled_directory_path: str, - cfood_file_name: str, - identifiables_definition_file: str = None, - debug: bool = False, - provenance_file: str = None, - dry_run: bool = False, - prefix: str = "", - securityMode: int = SecurityMode.UPDATE): +def crawler_main(crawled_directory_path: str, + cfood_file_name: str, + identifiables_definition_file: str = None, + debug: bool = False, + provenance_file: str = None, + dry_run: bool = False, + prefix: str = "", + securityMode: int = SecurityMode.UPDATE): """ Parameters @@ -1138,7 +1144,7 @@ def parse_args(): return parser.parse_args() -if __name__ == "__main__": +def main(): args = parse_args() conlogger = logging.getLogger("connection") @@ -1151,7 +1157,7 @@ if __name__ == "__main__": else: logger.setLevel(logging.INFO) - sys.exit(main( + sys.exit(crawler_main( args.crawled_directory_path, args.cfood_file_name, args.load_identifiables, @@ -1163,3 +1169,7 @@ if __name__ == "__main__": "insert": SecurityMode.INSERT, "update": SecurityMode.UPDATE}[args.security_mode] )) + + +if __name__ == "__main__": + main() diff --git a/src/caoscrawler/structure_elements.py b/src/caoscrawler/structure_elements.py index 6be653a4758e8c3fb789b22ea655836a3d976c34..01996b4ff3e14a9739857e6e03ceca161300b37e 100644 --- a/src/caoscrawler/structure_elements.py +++ b/src/caoscrawler/structure_elements.py @@ -23,12 +23,15 @@ # ** end header # +from typing import Dict + + class StructureElement(object): """ base class for elements in the hierarchical data structure """ def __init__(self, name): # Used to store usage information for debugging: - self.metadata: dict[str, set[str]] = { + self.metadata: Dict[str, set[str]] = { "usage": set() } diff --git a/tox.ini b/tox.ini index 5ab67e67cfef0b3cf0cf82d2d28de0fe11aca6a1..101904b7de43fba6f04cf65641f555d79b0b080a 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,7 @@ deps = . # TODO: Make this f-branch sensitive git+https://gitlab.indiscale.com/caosdb/src/caosdb-pylib.git@dev git+https://gitlab.indiscale.com/caosdb/src/caosdb-advanced-user-tools.git@dev -commands=py.test --cov=caosdb -vv {posargs} +commands= caosdb-crawler --help + py.test --cov=caosdb -vv {posargs} [flake8] max-line-length=100 diff --git a/unittests/test_converters.py b/unittests/test_converters.py index 7a6987b8b3fae9d747f2440de202df5d10a34cc0..15a39b72e75c8f26f3df80c24d48a4c5c2585029 100644 --- a/unittests/test_converters.py +++ b/unittests/test_converters.py @@ -26,16 +26,14 @@ test the converters module from caoscrawler.converters import Converter from caoscrawler.stores import GeneralStore -from caoscrawler.converters import (ConverterValidationError, - MarkdownFileConverter, JSONFileConverter, - DictConverter) -from caoscrawler.structure_elements import Directory +from caoscrawler.converters import (ConverterValidationError, DictConverter, + DirectoryConverter, handle_value, + MarkdownFileConverter, JSONFileConverter) from caoscrawler.structure_elements import (File, DictTextElement, DictListElement, DictElement, DictBooleanElement, DictDictElement, - DictIntegerElement, DictFloatElement) - -from caoscrawler.converters import handle_value + DictIntegerElement, + DictFloatElement, Directory) from test_tool import rfp @@ -272,3 +270,81 @@ def test_variable_replacement(): assert handle_value(["a", "b"], values) == (["a", "b"], "single") assert handle_value(["$a", "$b"], values) == (["4", "68"], "single") + + +def test_filter_children_of_directory(converter_registry): + """Verify that children (i.e., files) in a directory are filtered or sorted + correctly. + + """ + test_dir = Directory("examples_filter_children", rfp( + "test_directories", "examples_filter_children")) + + dc = DirectoryConverter( + definition={ + "match": "(.*)", + "filter": { + "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json", + "group": "date", + "rule": "only_max" + } + }, + name="TestOnlyMaxDirectoryConverter", + converter_registry=converter_registry + ) + + m = dc.match(test_dir) + assert m is not None + + # This should only contain the youngest json and the csv that doesn't match + # the above filter expression. + children = dc.create_children(None, test_dir) + assert len(children) == 2 + assert children[0].__class__ == File + assert children[0].name == "test_2022-02-02.json" + assert children[1].__class__ == File + assert children[1].name == "some_other_file.csv" + + dc = DirectoryConverter( + definition={ + "match": "(.*)", + "filter": { + "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json", + "group": "date", + "rule": "only_min" + } + }, + name="TestOnlyMinDirectoryConverter", + converter_registry=converter_registry + ) + + m = dc.match(test_dir) + assert m is not None + + # This should only contain the youngest json and the csv that doesn't match + # the above filter expression. + children = dc.create_children(None, test_dir) + assert len(children) == 2 + assert children[0].__class__ == File + assert children[0].name == "test_2022-01-01.json" + assert children[1].__class__ == File + assert children[1].name == "some_other_file.csv" + + dc = DirectoryConverter( + definition={ + "match": "(.*)", + "filter": { + "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json", + "group": "date", + "rule": "does_not_exist" + } + }, + name="TestBrokenDirectoryConverter", + converter_registry=converter_registry + ) + + m = dc.match(test_dir) + assert m is not None + + with pytest.raises(RuntimeError): + children = dc.create_children(None, test_dir) diff --git a/unittests/test_directories/example_substitutions/substitutions_parents.yml b/unittests/test_directories/example_substitutions/substitutions_parents.yml new file mode 100644 index 0000000000000000000000000000000000000000..107e766ccd833fab618cecfc04f13bc29abc80a6 --- /dev/null +++ b/unittests/test_directories/example_substitutions/substitutions_parents.yml @@ -0,0 +1,25 @@ + +ExperimentalData: # name of the converter + type: Directory + match: ExperimentalData + records: + Project: + name: project + subtree: + File: # name of the converter + type: SimpleFile + match: (?P<year>[0-9]{2,2})(?P<month>[0-9]{2,2})(?P<day>[0-9]{2,2})_data.dat + records: + Experiment: + parents: + - Experiment + - Month_$month # This adds a special parent as record type + date: 20$year-$month-$day + + ExperimentSeries: + Experiment: $Experiment + + Project: + Experiments: +$Experiment + dates: +20$year-$month-$day + diff --git a/unittests/test_directories/examples_filter_children/some_other_file.csv b/unittests/test_directories/examples_filter_children/some_other_file.csv new file mode 100644 index 0000000000000000000000000000000000000000..bc715fe81656397eae98aa4b04f9af2e3fdd9e43 --- /dev/null +++ b/unittests/test_directories/examples_filter_children/some_other_file.csv @@ -0,0 +1,2 @@ +some,other,data +1,2,3 diff --git a/unittests/test_directories/examples_filter_children/test_2022-01-01.json b/unittests/test_directories/examples_filter_children/test_2022-01-01.json new file mode 100644 index 0000000000000000000000000000000000000000..8de42f29d2eed374a0aba356c7fce2daa3e08e49 --- /dev/null +++ b/unittests/test_directories/examples_filter_children/test_2022-01-01.json @@ -0,0 +1,3 @@ +{ + "key": "value", +} diff --git a/unittests/test_directories/examples_filter_children/test_2022-01-02.json b/unittests/test_directories/examples_filter_children/test_2022-01-02.json new file mode 100644 index 0000000000000000000000000000000000000000..8de42f29d2eed374a0aba356c7fce2daa3e08e49 --- /dev/null +++ b/unittests/test_directories/examples_filter_children/test_2022-01-02.json @@ -0,0 +1,3 @@ +{ + "key": "value", +} diff --git a/unittests/test_directories/examples_filter_children/test_2022-02-02.json b/unittests/test_directories/examples_filter_children/test_2022-02-02.json new file mode 100644 index 0000000000000000000000000000000000000000..8de42f29d2eed374a0aba356c7fce2daa3e08e49 --- /dev/null +++ b/unittests/test_directories/examples_filter_children/test_2022-02-02.json @@ -0,0 +1,3 @@ +{ + "key": "value", +} diff --git a/unittests/test_directories/examples_json/testjson.json b/unittests/test_directories/examples_json/testjson.json index b893b608a6a2119c5c3252cd9cff4c4100f404da..d37ea2defc21d767e4e13ad3b39d6682b3c452ef 100644 --- a/unittests/test_directories/examples_json/testjson.json +++ b/unittests/test_directories/examples_json/testjson.json @@ -6,6 +6,7 @@ { "firstname": "Miri", "lastname": "Mueller", + "other": null, "email": "miri.mueller@science.de" }, { diff --git a/unittests/test_issues.py b/unittests/test_issues.py new file mode 100644 index 0000000000000000000000000000000000000000..6e77b0c7f26f4b2970203cfc4b8cc786fe24121b --- /dev/null +++ b/unittests/test_issues.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 IndiScale GmbH <info@indiscale.com> +# Copyright (C) 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# + +from pytest import mark + +from caoscrawler.crawl import Crawler +from caoscrawler.structure_elements import Dict +from test_tool import rfp + + +@mark.xfail( + reason="Wait until value conversion in dicts is fixed, see " + "https://gitlab.com/caosdb/caosdb-crawler/-/issues/10." +) +def test_issue_10(): + """Test integer-to-float conversion in dictionaries""" + crawler_definition = { + "DictTest": { + "type": "Dict", + "match": "(.*)", + "records": { + "TestRec": {} + }, + "subtree": { + "float_element": { + "type": "DictFloatElement", + "match_name": "float_value", + "match_value": "(?P<float_value>.*)", + "records": { + "TestRec": { + "float_prop": "$float_value" + } + } + } + } + } + } + + crawler = Crawler(debug=True) + converter_registry = crawler.load_converters(crawler_definition) + + test_dict = { + "float_value": 4 + } + + records = crawler.start_crawling( + Dict("TestDict", test_dict), crawler_definition, converter_registry) + assert len(records) == 1 + assert records[0].parents[0].name == "TestRec" + assert records[0].get_property("float_prop") is not None + assert float(records[0].get_property("float_prop").value) == 4.0 diff --git a/unittests/test_validation.py b/unittests/test_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..686c66f72f55b66344322e0c6f3b9d1a2b76b3f9 --- /dev/null +++ b/unittests/test_validation.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# ** header v3.0 +# This file is a part of the CaosDB Project. +# +# Copyright (C) 2022 Alexander Schlemmer +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# ** end header +# + +""" +Test the validation of cfood definition files. +""" + +from caoscrawler.crawl import Crawler + +from tempfile import NamedTemporaryFile + +import yaml +import pytest diff --git a/unittests/test_variable_substitutions.py b/unittests/test_variable_substitutions.py index 071bf4646d20e35ed05dafaf5fabf786dc182dcc..203197b7f8af51605a413ac354a0426d61c9c0cb 100644 --- a/unittests/test_variable_substitutions.py +++ b/unittests/test_variable_substitutions.py @@ -40,6 +40,15 @@ def crawler(): return crawler +@pytest.fixture +def crawler_2(): + crawler = Crawler(debug=True) + crawler.crawl_directory(rfp("test_directories", "example_substitutions", "ExperimentalData"), + rfp("test_directories", "example_substitutions", + "substitutions_parents.yml")) + return crawler + + def test_substitutions(crawler): # @review Florian Spreckelsen 2022-05-13 for i in range(2): @@ -59,3 +68,18 @@ def test_substitutions(crawler): assert isinstance(subd[i]["Project"].get_property("dates").value, list) assert subd[i]["Project"].get_property( "dates").value[0] == "2022-05-12" + + +def test_substitutions_parents(crawler_2): + # This is a test for: + # https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/35 + # ... testing whether variable substitutions can be used in parent declarations. + subd = crawler_2.debug_tree[dircheckstr( + "File", "ExperimentalData", "220512_data.dat")] + # subd[0] <- generalStore + # subd[1] <- recordStore + + parents = subd[1]["Experiment"].get_parents() + assert len(parents) == 2 + assert parents[0].name == "Experiment" + assert parents[1].name == "Month_05"