Skip to content
Snippets Groups Projects
Select Git revision
  • 2aa2f759d133faa988ff3fb35384d033b754efb0
  • main default protected
  • dev
  • f-spss-value-label-name
  • f-unmod
  • f-checkidentical
  • f-simple-breakpoint
  • f-new-debug-tree
  • f-existing-file-id
  • f-no-ident
  • f-collect-problems
  • f-refactor-debug-tree
  • v0.13.0
  • v0.12.0
  • v0.11.0
  • v0.10.1
  • v0.10.0
  • v0.9.1
  • v0.9.0
  • v0.8.0
  • v0.7.1
  • v0.7.0
  • v0.6.0
  • v0.5.0
  • v0.4.0
  • v0.3.0
  • v0.2.0
  • v0.1.0
28 results

test_tool.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    test_issues.py 13.39 KiB
    # This file is a part of the CaosDB Project.
    #
    # Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
    #               2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU Affero General Public License as
    # published by the Free Software Foundation, either version 3 of the
    # License, or (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU Affero General Public License for more details.
    #
    # You should have received a copy of the GNU Affero General Public License
    # along with this program. If not, see <https://www.gnu.org/licenses/>.
    #
    from pytest import fixture, mark, raises
    
    import linkahead as db
    from linkahead.cached import cache_clear
    from caosadvancedtools.models.parser import parse_model_from_string
    
    from caoscrawler.crawl import Crawler
    from caoscrawler.identifiable import Identifiable
    from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
    from caoscrawler.structure_elements import DictElement
    
    from caoscrawler.scanner import create_converter_registry, scan_structure_elements
    
    from linkahead.utils.register_tests import clear_database, set_test_key
    set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
    
    
    @fixture(autouse=True)
    def clear_cache():
        """Clear the LinkAhead cache."""
        cache_clear()
    
    
    def test_issue_23(clear_database):
        """Test that an update leaves existing properties, that were not found by
        the crawler, unchanged.
    
        See issue https://gitlab.com/caosdb/caosdb-crawler/-/issues/23
    
        """
    
        # insert a simplistic model an arecord of type TestType with identifying
        # property and prop_a, but not prop_b.
        prop_ident = db.Property(name="identifying_prop", datatype=db.TEXT)
        prop_a = db.Property(name="prop_a", datatype=db.TEXT)
        prop_b = db.Property(name="prop_b", datatype=db.TEXT)
        rt = db.RecordType(name="TestType")
        rec = db.Record(name="TestRec").add_parent(rt)
        rec.add_property(name="identifying_prop", value="identifier")
        rec.add_property(name="prop_a", value="something")
        db.Container().extend([prop_ident, prop_a, prop_b, rt, rec]).insert()
    
        # set up crawler, first cfood defining a TestType record with
        # identifying_prop and prop_b, but not prop_a ...
        crawler_definition = {
            "DictTest": {
                "type": "DictElement",
                "match": "(.*)",
                "records": {
                    "TestType": {}
                },
                "subtree": {
                    "identifying_element": {
                        "type": "TextElement",
                        "match_name": "ident",
                        "match_value": "(?P<ident_value>.*)",
                        "records": {
                            "TestType": {
                                "identifying_prop": "$ident_value"
                            }
                        }
                    },
                    "other_element": {
                        "type": "TextElement",
                        "match_name": "prop_b",
                        "match_value": "(?P<other_value>.*)",
                        "records": {
                            "TestType": {
                                "prop_b": "$other_value"
                            }
                        }
                    }
                }
            }
        }
    
        # register identifiable for TestType
        ident = CaosDBIdentifiableAdapter()
        ident.register_identifiable("TestType", db.RecordType().add_parent(
            name="TestType").add_property(name="identifying_prop"))
    
        crawler = Crawler(identifiableAdapter=ident)
        converter_registry = create_converter_registry(crawler_definition)
    
        # the dictionary to be crawled...
        test_dict = {
            "ident": "identifier",
            "prop_b": "something_else"
        }
    
        crawler.generate_run_id()
        records = scan_structure_elements(
            DictElement("TestDict", test_dict), crawler_definition, converter_registry)
    
        assert len(records) == 1
        rec_crawled = records[0]
        assert rec_crawled.parents[0].name == "TestType"
        assert rec_crawled.get_property("identifying_prop") is not None
        assert rec_crawled.get_property("identifying_prop").value == "identifier"
        assert rec_crawled.get_property("prop_b") is not None
        assert rec_crawled.get_property("prop_b").value == "something_else"
        # no interaction with the database yet, so the record shouldn't have a prop_a yet
        assert rec_crawled.get_property("prop_a") is None
    
        # synchronize with database and update the record
        ins, ups = crawler.synchronize(crawled_data=records)
        assert len(ins) == 0
        assert len(ups) == 1
    
        # retrieve and check that name and properties have been combined correctly
        rec_retrieved = db.Record(id=rec.id).retrieve()
        assert rec_retrieved.name == rec.name
        assert rec_retrieved.get_property(
            "identifying_prop").value == rec.get_property("identifying_prop").value
        assert rec_retrieved.get_property(
            "prop_a").value == rec.get_property("prop_a").value
        assert rec_retrieved.get_property(
            "identifying_prop").value == rec_crawled.get_property("identifying_prop").value
        assert rec_retrieved.get_property(
            "prop_b").value == rec_crawled.get_property("prop_b").value
    
    
    def test_issue_83(clear_database):
        """https://gitlab.com/linkahead/linkahead-crawler/-/issues/83. Test that
        names don't need to be unique for referenced entities if they are not part
        of the identifiable.
    
        """
    
        # Very simple data model
        identifying_prop = db.Property(name="IdentifyingProp", datatype=db.INTEGER).insert()
        referenced_type = db.RecordType(name="ReferencedType").add_property(
            name=identifying_prop.name, importance=db.OBLIGATORY).insert()
        referencing_type = db.RecordType(name="ReferencingType").add_property(
            name=referenced_type.name, datatype=db.LIST(referenced_type.name)).insert()
    
        # Define identifiables. ReferencingType by name, ReferencedType by
        # IdentifyingProp and not by name.
        ident = CaosDBIdentifiableAdapter()
        ident.register_identifiable(referenced_type.name, db.RecordType().add_parent(
            name=referenced_type.name).add_property(name=identifying_prop.name))
        ident.register_identifiable(referencing_type.name, db.RecordType().add_parent(
            name=referencing_type.name).add_property(name="name"))
    
        crawler = Crawler(identifiableAdapter=ident)
    
        ref_target1 = db.Record(name="RefTarget").add_parent(
            name=referenced_type.name).add_property(name=identifying_prop.name, value=1)
        ref_target2 = db.Record(name="RefTarget").add_parent(
            name=referenced_type.name).add_property(name=identifying_prop.name, value=2)
    
        referencing1 = db.Record(name="Referencing1").add_parent(
            name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1])
        referencing2 = db.Record(name="Referencing2").add_parent(
            name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target2])
        referencing3 = db.Record(name="Referencing3").add_parent(name=referencing_type.name).add_property(
            name=referenced_type.name, value=[ref_target1, ref_target2])
    
        records = db.Container().extend(
            [ref_target1, ref_target2, referencing1, referencing2, referencing3])
    
        ins, ups = crawler.synchronize(crawled_data=records, unique_names=False)
        assert len(ins) == len(records)
        assert len(ups) == 0
    
        retrieved_target1 = db.execute_query(
            f"FIND {referenced_type.name} WITH {identifying_prop.name}=1", unique=True)
        retrieved_target2 = db.execute_query(
            f"FIND {referenced_type.name} WITH {identifying_prop.name}=2", unique=True)
        assert retrieved_target2.name == retrieved_target1.name
        assert retrieved_target1.name == ref_target1.name
        assert retrieved_target1.id != retrieved_target2.id
    
        retrieved_referencing1 = db.execute_query(
            f"FIND {referencing_type.name} WITH name={referencing1.name}", unique=True)
        assert retrieved_referencing1.get_property(referenced_type.name) is not None
        assert retrieved_referencing1.get_property(referenced_type.name).value == [
            retrieved_target1.id]
        assert retrieved_referencing1.get_property(referenced_type.name).value != [
            retrieved_target2.id]
    
        retrieved_referencing2 = db.execute_query(
            f"FIND {referencing_type.name} WITH name={referencing2.name}", unique=True)
        assert retrieved_referencing2.get_property(referenced_type.name) is not None
        assert retrieved_referencing2.get_property(referenced_type.name).value == [
            retrieved_target2.id]
        assert retrieved_referencing2.get_property(referenced_type.name).value != [
            retrieved_target1.id]
    
        retrieved_referencing3 = db.execute_query(
            f"FIND {referencing_type.name} WITH name={referencing3.name}", unique=True)
        assert retrieved_referencing3.get_property(referenced_type.name) is not None
        assert len(retrieved_referencing3.get_property(referenced_type.name).value) == 2
        assert retrieved_target1.id in retrieved_referencing3.get_property(referenced_type.name).value
        assert retrieved_target2.id in retrieved_referencing3.get_property(referenced_type.name).value
    
    
    def test_indiscale_113(clear_database):
        """Somewhat mysterious failures to resolve references in
        split_into_inserts_and_updates, see
        https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/113
    
        """
    
        # Create and insert minimal datamodel
        datamodel_str = """
    Event:
      recommended_properties:
        Basis:
        Campaign:
    Basis:
    Campaign:
      recommended_properties:
        Basis:
    """
        model = parse_model_from_string(datamodel_str)
        model.sync_data_model(noquestion=True)
    
        # Register identifiables, everything is identified by name
        ident = CaosDBIdentifiableAdapter()
        ident.register_identifiable("Event", db.RecordType().add_parent(
            name="Event").add_property(name="name"))
        ident.register_identifiable("Basis", db.RecordType().add_parent(
            name="Basis").add_property(name="name"))
        ident.register_identifiable("Campaign", db.RecordType().add_parent(
            name="Campaign").add_property(name="name"))
    
        crawler = Crawler(identifiableAdapter=ident)
    
        # Add records: event references basis and campaign, campaign references
        # basis.
        basis = db.Record(name="Poseidon").add_parent(name="Basis")
        campaign = db.Record(name="POS386").add_parent(
            name="Campaign").add_property(name="Basis", value=basis)
        event = db.Record(name="GeoB13952").add_parent(name="Event")
        event.add_property(name="Basis", value=basis)
        event.add_property(name="Campaign", value=campaign)
    
        # basis and campaign already exist in the db
        db.Container().extend([basis, campaign]).insert()
        # redefine to trigger resolving
        basis = db.Record(name="Poseidon").add_parent(name="Basis")
        campaign = db.Record(name="POS386").add_parent(
            name="Campaign").add_property(name="Basis", value=basis)
        recs = [event, basis, campaign]
    
        ins, ups = crawler.synchronize(crawled_data=recs, unique_names=False)
        # There is only one event to be inserted
        assert len(ins) == 1
        # Nothing to do for the existing ents
        assert len(ups) == 0
        assert ins[0].name == event.name
    
    
    def test_indiscale_87(clear_database):
        """Handle long string queries gracefully.
    
        https://gitlab.com/linkahead/linkahead-crawler/-/issues/87
        """
    
        prop = db.Property(name="str", datatype=db.TEXT).insert()
        rt = db.RecordType(name="RT1").add_property(prop).insert()
        strings = [
            "X123456789" * 26,
            "X" * 260,
            "X123456789" * 25 + "9876543210",
        ]
        recs = [
            db.Record().add_parent(rt).add_property(name="str", value=string).insert()
            for string in strings
        ]
        idents = [
            Identifiable(record_type="RT1", properties={"str": string})
            for string in strings
        ]
        adapter = CaosDBIdentifiableAdapter()
        for rec, ident in zip(recs, idents):
            print(f"Testing: ...{rec.get_property('str').value[-10:]}")
            retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
            # print(rec)
            # print(retrieved)
            print(db.apiutils.compare_entities(rec, retrieved))
            assert db.apiutils.empty_diff(rec, retrieved)
            print("---")
    
        # add another, harmless, property
        prop2 = db.Property(name="someint", datatype=db.INTEGER).insert()
        rt.add_property(prop2).update()
        string = "Y123456789" * 26
        numbers = [23, 42]
        recs = [
            db.Record().add_parent(rt).add_property(name="str", value=string).add_property(
                name="someint", value=number).insert()
            for number in numbers
        ]
        idents = [Identifiable(record_type="RT1", properties={"str": string})]
        # Ambiguous result
        with raises(RuntimeError, match=".*unambiguously.*"):
            retrieved = adapter.retrieve_identified_record_for_identifiable(idents[0])
    
        # Upgrade new property to be identifying
        idents = [
            Identifiable(record_type="RT1", properties={"str": string, "someint": number})
            for number in numbers
        ]
        for rec, ident in zip(recs, idents):
            print(f"Testing: someint={rec.get_property('someint').value}")
            retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
            # print(rec)
            # print(retrieved)
            print(db.apiutils.compare_entities(rec, retrieved))
            assert db.apiutils.empty_diff(rec, retrieved)
            print("---")