Skip to content
Snippets Groups Projects
Select Git revision
  • 1b8e77dfe1bbc9bc16758eb3637b7f24246c7311
  • main default protected
  • dev protected
  • f-fix-accent-sensitivity
  • f-filesystem-import
  • f-update-acl
  • f-filesystem-link
  • f-filesystem-directory
  • f-filesystem-core
  • f-filesystem-cleanup
  • f-string-ids
  • f-filesystem-main
  • f-multipart-encoding
  • f-trigger-advanced-user-tools
  • f-real-rename-test-pylibsolo2
  • f-real-rename-test-pylibsolo
  • f-real-rename-test
  • f-linkahead-rename
  • f-reference-record
  • f-xml-serialization
  • f-xfail-server-181
  • linkahead-pylib-v0.18.0
  • linkahead-control-v0.16.0
  • linkahead-pylib-v0.17.0
  • linkahead-mariadbbackend-v8.0.0
  • linkahead-server-v0.13.0
  • caosdb-pylib-v0.15.0
  • caosdb-pylib-v0.14.0
  • caosdb-pylib-v0.13.2
  • caosdb-server-v0.12.1
  • caosdb-pylib-v0.13.1
  • caosdb-pylib-v0.12.0
  • caosdb-server-v0.10.0
  • caosdb-pylib-v0.11.1
  • caosdb-pylib-v0.11.0
  • caosdb-server-v0.9.0
  • caosdb-pylib-v0.10.0
  • caosdb-server-v0.8.1
  • caosdb-pylib-v0.8.0
  • caosdb-server-v0.8.0
  • caosdb-pylib-v0.7.2
41 results

test_server_side_scripting.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    test_converters.py 14.78 KiB
    #!/usr/bin/env python3
    # encoding: utf-8
    #
    # This file is a part of the CaosDB Project.
    #
    # Copyright (C) 2021,2022 Indiscale GmbH <info@indiscale.com>
    # Copyright (C) 2021,2022 Henrik tom Wörden <h.tomwoerden@indiscale.com>
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU Affero General Public License as
    # published by the Free Software Foundation, either version 3 of the
    # License, or (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU Affero General Public License for more details.
    #
    # You should have received a copy of the GNU Affero General Public License
    # along with this program. If not, see <https://www.gnu.org/licenses/>.
    #
    
    """
    test the converters module
    """
    import importlib
    import os
    import pytest
    import yaml
    
    from caoscrawler.converters import (Converter, ConverterValidationError, DictElementConverter,
                                        DirectoryConverter, DictIntegerElementConverter,
                                        handle_value, MarkdownFileConverter,
                                        FloatElementConverter, IntegerElementConverter,
                                        JSONFileConverter)
    from caoscrawler.converters import _AbstractScalarValueElementConverter
    from caoscrawler.crawl import Crawler
    from caoscrawler.stores import GeneralStore
    from caoscrawler.structure_elements import (File, TextElement, ListElement, DictElement,
                                                BooleanElement, IntegerElement,
                                                FloatElement, Directory)
    
    from test_tool import rfp
    
    
    @pytest.fixture
    def converter_registry():
        converter_registry: dict[str, dict[str, str]] = {
            "Directory": {
                "converter": "DirectoryConverter",
                "package": "caoscrawler.converters"},
            "MarkdownFile": {
                "converter": "MarkdownFileConverter",
                "package": "caoscrawler.converters"},
            "DictElement": {
                "converter": "DictElementConverter",
                "package": "caoscrawler.converters"},
            "TextElement": {
                "converter": "TextElementConverter",
                "package": "caoscrawler.converters"},
            "ListElement": {
                "converter": "ListElementConverter",
                "package": "caoscrawler.converters"},
            "TextElement": {
                "converter": "TextElementConverter",
                "package": "caoscrawler.converters"},
            "JSONFile": {
                "converter": "JSONFileConverter",
                "package": "caoscrawler.converters"},
        }
    
        for key, value in converter_registry.items():
            module = importlib.import_module(value["package"])
            value["class"] = getattr(module, value["converter"])
        return converter_registry
    
    
    def testConverterTrivial(converter_registry):
    
        types = [
            "Directory",
            "MarkdownFile",
            "TextElement",
            "ListElement",
            "TextElement"
        ]
    
        for ct in types:
            Converter.converter_factory(
                definition={
                    "type": ct},
                name="Test",
                converter_registry=converter_registry)
    
    
    def testDirectoryConverter(converter_registry):
        """ test using the "test_directories" folder"""
        dc = Converter.converter_factory(
            definition={
                "type": "Directory"
            },
            name="Test", converter_registry=converter_registry)
        elements = dc.create_children(GeneralStore(),
                                      Directory("test_directories", rfp("test_directories")))
    
        # Check whether the right structure elements were created
        # this has been updated, there are more directories now
        # assert len(elements) == 1
        element_names = []
        for element in elements:
            assert isinstance(element, Directory)
            element_names.append(element.name)
        assert "examples_article" in element_names
        assert "example_overwrite_1" in element_names
        assert "example_insert" in element_names
    
    
    def test_markdown_converter(converter_registry):
        test_readme = File(
            "README.md",
            rfp(
                "test_directories", "examples_article", "DataAnalysis",
                "2020_climate-model-predict", "2020-02-08_prediction-errors", "README.md"
            )
        )
    
        converter = MarkdownFileConverter({
            "match": "(.*)"
        }, "TestMarkdownFileConverter",
           converter_registry)
    
        m = converter.match(File("test_tool.py", rfp(
            "test_tool.py")))
        assert m is None
    
        m = converter.match(test_readme)
        assert m is not None
        assert m.__class__ == dict
        assert len(m) == 0
    
        converter = MarkdownFileConverter({
            "match": "README.md"
        }, "TestMarkdownFileConverter",
           converter_registry)
    
        m = converter.match(test_readme)
        assert m is not None
        assert len(m) == 0
    
        children = converter.create_children(None, test_readme)
        assert len(children) == 5
        assert children[1].__class__ == TextElement
        assert children[1].name == "description"
        assert children[1].value.__class__ == str
    
        assert children[0].__class__ == TextElement
        assert children[0].name == "responsible"
        assert children[0].value.__class__ == str
    
        test_readme2 = File(
            "README.md",
            rfp("test_directories", "examples_article",
                "ExperimentalData", "2020_SpeedOfLight", "2020-01-01_TimeOfFlight", "README.md")
        )
    
        m = converter.match(test_readme2)
        assert m is not None
        assert len(m) == 0
    
        children = converter.create_children(None, test_readme2)
        assert len(children) == 2
        assert children[1].__class__ == TextElement
        assert children[1].name == "description"
        assert children[1].value.__class__ == str
    
        assert children[0].__class__ == ListElement
        assert children[0].name == "responsible"
        assert children[0].value.__class__ == list
    
    
    def test_json_converter(converter_registry):
        test_json = File("testjson.json", rfp(
            "test_directories", "examples_json", "testjson.json"))
    
        schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                   "test_directories", "examples_json", "testjson.schema.json")
        jsonconverter = JSONFileConverter(
            definition={"match": "(.*)", "validate": schema_path},
            name="TestJSONFileConverter",
            converter_registry=converter_registry)
    
        m = jsonconverter.match(test_json)
        assert m is not None
        assert len(m) == 0
    
        dict_el = jsonconverter.create_children(None, test_json)
        assert len(dict_el) == 1
    
        dictconverter = DictElementConverter(
            definition={"match_name": "(.*)"},
            name="dictconv",
            converter_registry=converter_registry)
        children = dictconverter.create_children(None, dict_el[0])
        for child in children:
            if child.name == "name":
                assert isinstance(child, TextElement)
                assert isinstance(child.value, str)
                assert child.value == "DEMO"
            elif child.name == "projectId":
                assert isinstance(child, IntegerElement)
                assert isinstance(child.value, int)
                assert child.value == 10002
            elif child.name == "archived":
                assert isinstance(child, BooleanElement)
                assert isinstance(child.value, bool)
                assert child.value is False
            elif child.name == "Person":
                assert isinstance(child, ListElement)
                assert isinstance(child.value, list)
                assert len(child.value) == 2
            elif child.name == "start_date":
                assert isinstance(child, TextElement)
                assert isinstance(child.value, str)
                assert child.value == '2022-03-01'
            elif child.name == "candidates":
                assert isinstance(child, ListElement)
                assert isinstance(child.value, list)
                assert child.value == ["Mouse", "Penguine"]
            elif child.name == "rvalue":
                assert isinstance(child, FloatElement)
                assert isinstance(child.value, float)
            elif child.name == "url":
                assert isinstance(child, TextElement)
                assert isinstance(child.value, str)
            else:
                raise ValueError()
    
        broken_json = File(
            "brokenjson.json",
            rfp("test_directories", "examples_json", "brokenjson.json")
        )
        m = jsonconverter.match(broken_json)
    
        # Doesn't validate because of missing required 'name' property
        with pytest.raises(ConverterValidationError) as err:
            jsonconverter.create_children(None, broken_json)
    
        assert err.value.message.startswith("Couldn't validate")
    
    
    def test_variable_replacement():
        values = GeneralStore()
        values["a"] = 4
        values["b"] = "68"
    
        assert handle_value("b", values) == ("b", "single")
        assert handle_value("+b", values) == ("b", "list")
        assert handle_value("*b", values) == ("b", "multiproperty")
        assert handle_value("$b", values) == ("68", "single")
        assert handle_value("+$b", values) == ("68", "list")
        assert handle_value("*$b", values) == ("68", "multiproperty")
    
        assert handle_value({"value": "b",
                             "collection_mode": "single"}, values) == ("b", "single")
        assert handle_value({"value": "b",
                             "collection_mode": "list"}, values) == ("b", "list")
        assert handle_value({"value": "b",
                             "collection_mode": "multiproperty"}, values) == ("b", "multiproperty")
        assert handle_value({"value": "$b",
                             "collection_mode": "single"}, values) == ("68", "single")
        assert handle_value({"value": "$b",
                             "collection_mode": "list"}, values) == ("68", "list")
        assert handle_value({"value": "$b",
                             "collection_mode": "multiproperty"}, values) == ("68", "multiproperty")
    
        assert handle_value(["a", "b"], values) == (["a", "b"], "single")
        assert handle_value(["$a", "$b"], values) == (["4", "68"], "single")
    
    
    def test_filter_children_of_directory(converter_registry):
        """Verify that children (i.e., files) in a directory are filtered or sorted
        correctly.
    
        """
        test_dir = Directory("examples_filter_children", rfp(
            "test_directories", "examples_filter_children"))
    
        dc = DirectoryConverter(
            definition={
                "match": "(.*)",
                "filter": {
                    "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
                    "group": "date",
                    "rule": "only_max"
                }
            },
            name="TestOnlyMaxDirectoryConverter",
            converter_registry=converter_registry
        )
    
        m = dc.match(test_dir)
        assert m is not None
    
        # This should only contain the youngest json and the csv that doesn't match
        # the above filter expression.
        children = dc.create_children(None, test_dir)
        assert len(children) == 2
        assert children[0].__class__ == File
        assert children[0].name == "test_2022-02-02.json"
        assert children[1].__class__ == File
        assert children[1].name == "some_other_file.csv"
    
        dc = DirectoryConverter(
            definition={
                "match": "(.*)",
                "filter": {
                    "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
                    "group": "date",
                    "rule": "only_min"
                }
            },
            name="TestOnlyMinDirectoryConverter",
            converter_registry=converter_registry
        )
    
        m = dc.match(test_dir)
        assert m is not None
    
        # This should only contain the youngest json and the csv that doesn't match
        # the above filter expression.
        children = dc.create_children(None, test_dir)
        assert len(children) == 2
        assert children[0].__class__ == File
        assert children[0].name == "test_2022-01-01.json"
        assert children[1].__class__ == File
        assert children[1].name == "some_other_file.csv"
    
        dc = DirectoryConverter(
            definition={
                "match": "(.*)",
                "filter": {
                    "expr": "test_(?P<date>[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}).json",
                    "group": "date",
                    "rule": "does_not_exist"
                }
            },
            name="TestBrokenDirectoryConverter",
            converter_registry=converter_registry
        )
    
        m = dc.match(test_dir)
        assert m is not None
    
        with pytest.raises(RuntimeError):
            children = dc.create_children(None, test_dir)
    
    
    def test_validate_custom_converters():
        one_doc_yaml = """
    Converters:
      MyNewType:
        converter: MyNewTypeConverter
        package: some_package.my_converters
    MyElement:
      type: MyNewType
      match: something
        """
        crawler1 = Crawler()
        one_doc_definitions = crawler1._load_definition_from_yaml_dict(
            [yaml.load(one_doc_yaml, Loader=yaml.SafeLoader)])
        assert "MyElement" in one_doc_definitions
        assert one_doc_definitions["MyElement"]["type"] == "MyNewType"
    
        # this has to be equivalent
        two_doc_yaml = """
    ---
    metadata:
      Converters:
        MyNewType:
          converter: MyNewTypeConverter
          package: some_package.my_converters
    ---
    MyElement:
      type: MyNewType
      match: something
        """
        crawler2 = Crawler()
        two_doc_definitions = crawler2._load_definition_from_yaml_dict(
            list(yaml.safe_load_all(two_doc_yaml)))
        assert "MyElement" in two_doc_definitions
        assert two_doc_definitions["MyElement"]["type"] == one_doc_definitions["MyElement"]["type"]
    
    
    def test_abstract_dict_element_converter():
        definition = yaml.safe_load("""
    match_name: text
    match_value: .*begin(?P<text>.*)end
    accept_text: True
        """)
        converter = _AbstractScalarValueElementConverter(
            definition, "test_converter",
            None  # This is possible when "subtree" is not used
        )
        element = TextElement("text", """
    begin
    bla
    end""")
        val = converter.match(element)
        assert val is not None
        assert val["text"] == "\nbla\n"
    
    
    def test_converter_value_match(converter_registry):
        # test with defaults
        dc = FloatElementConverter(
            definition={
                "match_name": "(.*)",
                "match_value": "(.*)",
            },
            name="Test",
            converter_registry=converter_registry
        )
        m = dc.match(IntegerElement(name="a", value=4))
        assert m is not None
    
        # overwrite default with no match for int
        dc = FloatElementConverter(
            definition={
                "match_name": "(.*)",
                "match_value": "(.*)",
                "accept_int": False,
            },
            name="Test",
            converter_registry=converter_registry
        )
        assert dc.typecheck(IntegerElement(name="a", value=4)) is False
    
        # overwrite default with match for float
        dc = IntegerElementConverter(
            definition={
                "match_name": "(.*)",
                "match_value": "(.*)",
                "accept_float": True,
            },
            name="Test",
            converter_registry=converter_registry
        )
        m = dc.match(FloatElement(name="a", value=4.0))
        assert m is not None