-
Florian Spreckelsen authoredFlorian Spreckelsen authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_issues.py 14.95 KiB
# This file is a part of the CaosDB Project.
#
# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
# 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import linkahead as db
from caosadvancedtools.models.parser import parse_model_from_string
from caoscrawler.crawl import Crawler
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.scanner import (create_converter_registry,
scan_structure_elements)
from caoscrawler.structure_elements import DictElement
from linkahead.cached import cache_clear
from linkahead.utils.register_tests import clear_database, set_test_key
from pytest import fixture, mark, raises
import tempfile
set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
@fixture(autouse=True)
def clear_cache():
"""Clear the LinkAhead cache."""
cache_clear()
def test_issue_23(clear_database):
"""Test that an update leaves existing properties, that were not found by
the crawler, unchanged.
See issue https://gitlab.com/caosdb/caosdb-crawler/-/issues/23
"""
# insert a simplistic model an arecord of type TestType with identifying
# property and prop_a, but not prop_b.
prop_ident = db.Property(name="identifying_prop", datatype=db.TEXT)
prop_a = db.Property(name="prop_a", datatype=db.TEXT)
prop_b = db.Property(name="prop_b", datatype=db.TEXT)
rt = db.RecordType(name="TestType")
rec = db.Record(name="TestRec").add_parent(rt)
rec.add_property(name="identifying_prop", value="identifier")
rec.add_property(name="prop_a", value="something")
db.Container().extend([prop_ident, prop_a, prop_b, rt, rec]).insert()
# set up crawler, first cfood defining a TestType record with
# identifying_prop and prop_b, but not prop_a ...
crawler_definition = {
"DictTest": {
"type": "DictElement",
"match": "(.*)",
"records": {
"TestType": {}
},
"subtree": {
"identifying_element": {
"type": "TextElement",
"match_name": "ident",
"match_value": "(?P<ident_value>.*)",
"records": {
"TestType": {
"identifying_prop": "$ident_value"
}
}
},
"other_element": {
"type": "TextElement",
"match_name": "prop_b",
"match_value": "(?P<other_value>.*)",
"records": {
"TestType": {
"prop_b": "$other_value"
}
}
}
}
}
}
# register identifiable for TestType
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable("TestType", db.RecordType().add_parent(
name="TestType").add_property(name="identifying_prop"))
crawler = Crawler(identifiableAdapter=ident)
converter_registry = create_converter_registry(crawler_definition)
# the dictionary to be crawled...
test_dict = {
"ident": "identifier",
"prop_b": "something_else"
}
crawler.generate_run_id()
records = scan_structure_elements(
DictElement("TestDict", test_dict), crawler_definition, converter_registry)
assert len(records) == 1
rec_crawled = records[0]
assert rec_crawled.parents[0].name == "TestType"
assert rec_crawled.get_property("identifying_prop") is not None
assert rec_crawled.get_property("identifying_prop").value == "identifier"
assert rec_crawled.get_property("prop_b") is not None
assert rec_crawled.get_property("prop_b").value == "something_else"
# no interaction with the database yet, so the record shouldn't have a prop_a yet
assert rec_crawled.get_property("prop_a") is None
# synchronize with database and update the record
ins, ups = crawler.synchronize(crawled_data=records)
assert len(ins) == 0
assert len(ups) == 1
# retrieve and check that name and properties have been combined correctly
rec_retrieved = db.Record(id=rec.id).retrieve()
assert rec_retrieved.name == rec.name
assert rec_retrieved.get_property(
"identifying_prop").value == rec.get_property("identifying_prop").value
assert rec_retrieved.get_property(
"prop_a").value == rec.get_property("prop_a").value
assert rec_retrieved.get_property(
"identifying_prop").value == rec_crawled.get_property("identifying_prop").value
assert rec_retrieved.get_property(
"prop_b").value == rec_crawled.get_property("prop_b").value
def test_issue_83(clear_database):
"""https://gitlab.com/linkahead/linkahead-crawler/-/issues/83. Test that
names don't need to be unique for referenced entities if they are not part
of the identifiable.
"""
# Very simple data model
identifying_prop = db.Property(name="IdentifyingProp", datatype=db.INTEGER).insert()
referenced_type = db.RecordType(name="ReferencedType").add_property(
name=identifying_prop.name, importance=db.OBLIGATORY).insert()
referencing_type = db.RecordType(name="ReferencingType").add_property(
name=referenced_type.name, datatype=db.LIST(referenced_type.name)).insert()
# Define identifiables. ReferencingType by name, ReferencedType by
# IdentifyingProp and not by name.
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable(referenced_type.name, db.RecordType().add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name))
ident.register_identifiable(referencing_type.name, db.RecordType().add_parent(
name=referencing_type.name).add_property(name="name"))
crawler = Crawler(identifiableAdapter=ident)
ref_target1 = db.Record(name="RefTarget").add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name, value=1)
ref_target2 = db.Record(name="RefTarget").add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name, value=2)
referencing1 = db.Record(name="Referencing1").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1])
referencing2 = db.Record(name="Referencing2").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target2])
referencing3 = db.Record(name="Referencing3").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1,
ref_target2])
records = db.Container().extend(
[ref_target1, ref_target2, referencing1, referencing2, referencing3])
ins, ups = crawler.synchronize(crawled_data=records, unique_names=False)
assert len(ins) == len(records)
assert len(ups) == 0
retrieved_target1 = db.execute_query(
f"FIND {referenced_type.name} WITH {identifying_prop.name}=1", unique=True)
retrieved_target2 = db.execute_query(
f"FIND {referenced_type.name} WITH {identifying_prop.name}=2", unique=True)
assert retrieved_target2.name == retrieved_target1.name
assert retrieved_target1.name == ref_target1.name
assert retrieved_target1.id != retrieved_target2.id
retrieved_referencing1 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing1.name}", unique=True)
assert retrieved_referencing1.get_property(referenced_type.name) is not None
assert retrieved_referencing1.get_property(referenced_type.name).value == [
retrieved_target1.id]
assert retrieved_referencing1.get_property(referenced_type.name).value != [
retrieved_target2.id]
retrieved_referencing2 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing2.name}", unique=True)
assert retrieved_referencing2.get_property(referenced_type.name) is not None
assert retrieved_referencing2.get_property(referenced_type.name).value == [
retrieved_target2.id]
assert retrieved_referencing2.get_property(referenced_type.name).value != [
retrieved_target1.id]
retrieved_referencing3 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing3.name}", unique=True)
assert retrieved_referencing3.get_property(referenced_type.name) is not None
assert len(retrieved_referencing3.get_property(referenced_type.name).value) == 2
assert retrieved_target1.id in retrieved_referencing3.get_property(referenced_type.name).value
assert retrieved_target2.id in retrieved_referencing3.get_property(referenced_type.name).value
def test_indiscale_113(clear_database):
"""Somewhat mysterious failures to resolve references in
split_into_inserts_and_updates, see
https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/113
"""
# Create and insert minimal datamodel
datamodel_str = """
Event:
recommended_properties:
Basis:
Campaign:
Basis:
Campaign:
recommended_properties:
Basis:
"""
model = parse_model_from_string(datamodel_str)
model.sync_data_model(noquestion=True)
# Register identifiables, everything is identified by name
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable("Event", db.RecordType().add_parent(
name="Event").add_property(name="name"))
ident.register_identifiable("Basis", db.RecordType().add_parent(
name="Basis").add_property(name="name"))
ident.register_identifiable("Campaign", db.RecordType().add_parent(
name="Campaign").add_property(name="name"))
crawler = Crawler(identifiableAdapter=ident)
# Add records: event references basis and campaign, campaign references
# basis.
basis = db.Record(name="Poseidon").add_parent(name="Basis")
campaign = db.Record(name="POS386").add_parent(
name="Campaign").add_property(name="Basis", value=basis)
event = db.Record(name="GeoB13952").add_parent(name="Event")
event.add_property(name="Basis", value=basis)
event.add_property(name="Campaign", value=campaign)
# basis and campaign already exist in the db
db.Container().extend([basis, campaign]).insert()
# redefine to trigger resolving
basis = db.Record(name="Poseidon").add_parent(name="Basis")
campaign = db.Record(name="POS386").add_parent(
name="Campaign").add_property(name="Basis", value=basis)
recs = [event, basis, campaign]
ins, ups = crawler.synchronize(crawled_data=recs, unique_names=False)
# There is only one event to be inserted
assert len(ins) == 1
# Nothing to do for the existing ents
assert len(ups) == 0
assert ins[0].name == event.name
def test_indiscale_87(clear_database):
"""Handle long string queries gracefully.
https://gitlab.com/linkahead/linkahead-crawler/-/issues/87
"""
prop = db.Property(name="str", datatype=db.TEXT).insert()
rt = db.RecordType(name="RT1").add_property(prop).insert()
strings = [
"X123456789" * 26,
"X" * 260,
"X123456789" * 25 + "9876543210",
]
recs = [
db.Record().add_parent(rt).add_property(name="str", value=string).insert()
for string in strings
]
idents = [
Identifiable(record_type="RT1", properties={"str": string})
for string in strings
]
adapter = CaosDBIdentifiableAdapter()
for rec, ident in zip(recs, idents):
print(f"Testing: ...{rec.get_property('str').value[-10:]}")
retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
# print(rec)
# print(retrieved)
print(db.apiutils.compare_entities(rec, retrieved))
assert db.apiutils.empty_diff(rec, retrieved)
print("---")
# add another, harmless, property
prop2 = db.Property(name="someint", datatype=db.INTEGER).insert()
rt.add_property(prop2).update()
string = "Y123456789" * 26
numbers = [23, 42]
recs = [
db.Record().add_parent(rt).add_property(name="str", value=string).add_property(
name="someint", value=number).insert()
for number in numbers
]
idents = [Identifiable(record_type="RT1", properties={"str": string})]
# Ambiguous result
with raises(RuntimeError, match=".*unambiguously.*"):
retrieved = adapter.retrieve_identified_record_for_identifiable(idents[0])
# Upgrade new property to be identifying
idents = [
Identifiable(record_type="RT1", properties={"str": string, "someint": number})
for number in numbers
]
for rec, ident in zip(recs, idents):
print(f"Testing: someint={rec.get_property('someint').value}")
retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
# print(rec)
# print(retrieved)
print(db.apiutils.compare_entities(rec, retrieved))
assert db.apiutils.empty_diff(rec, retrieved)
print("---")
def test_issue_14(clear_database):
"""
Issue title: Some parent updates are required before inserts
https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/14
"""
rt1 = db.RecordType(name="RT1")
rt2 = db.RecordType(name="RT2").insert()
rt1.add_property(rt2, importance=db.OBLIGATORY)
rt1.insert()
r = db.Record()
r.add_parent(rt1)
with tempfile.NamedTemporaryFile() as tmpf:
f = db.File(name="test_parent", path="parent_test/file.txt", file=tmpf.name)
f.insert()
# We create a clean new file object here:
f2 = db.File(name="test_parent", path="parent_test/file.txt", file=tmpf.name)
f2.add_parent(rt2)
r.add_property(name="RT2", value=f2)
# Current state in the database: File without parents
f_test_base = db.File(name="test_parent").retrieve()
assert len(f_test_base.parents) == 0
assert len(db.execute_query("FIND Record")) == 0
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable("RT1", db.RecordType().add_parent(
name="RT1").add_property(name="RT2"))
crawler = Crawler(identifiableAdapter=ident)
crawler.synchronize(crawled_data=[f2, r])
f_test = db.File(name="test_parent").retrieve()
assert len(f_test.parents) == 1
assert f_test.parents[0].name == "RT2"
records = db.execute_query("FIND Record")
assert len(records) == 1
assert records[0].get_property("RT2").value == f_test.id