Select Git revision
test_issues.py
-
Henrik tom Wörden authored
DictXYElements and DictXYElementConverters are now XYElements and XYElementConverters, respectively.
Henrik tom Wörden authoredDictXYElements and DictXYElementConverters are now XYElements and XYElementConverters, respectively.
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
test_issues.py 14.95 KiB
# This file is a part of the LinkAhead Project.
#
# Copyright (C) 2022 Indiscale GmbH <info@indiscale.com>
# 2022 Florian Spreckelsen <f.spreckelsen@indiscale.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import linkahead as db
from caosadvancedtools.models.parser import parse_model_from_string
from caoscrawler.crawl import Crawler
from caoscrawler.identifiable import Identifiable
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.scanner import (create_converter_registry,
scan_structure_elements)
from caoscrawler.structure_elements import DictElement
from linkahead.cached import cache_clear
from linkahead.utils.register_tests import clear_database, set_test_key
from pytest import fixture, mark, raises
import tempfile
set_test_key("10b128cf8a1372f30aa3697466bb55e76974e0c16a599bb44ace88f19c8f61e2")
@fixture(autouse=True)
def clear_cache():
"""Clear the LinkAhead cache."""
cache_clear()
def test_issue_23(clear_database):
"""Test that an update leaves existing properties, that were not found by
the crawler, unchanged.
See issue https://gitlab.com/caosdb/caosdb-crawler/-/issues/23
"""
# insert a simplistic model an arecord of type TestType with identifying
# property and prop_a, but not prop_b.
prop_ident = db.Property(name="identifying_prop", datatype=db.TEXT)
prop_a = db.Property(name="prop_a", datatype=db.TEXT)
prop_b = db.Property(name="prop_b", datatype=db.TEXT)
rt = db.RecordType(name="TestType")
rec = db.Record(name="TestRec").add_parent(rt)
rec.add_property(name="identifying_prop", value="identifier")
rec.add_property(name="prop_a", value="something")
db.Container().extend([prop_ident, prop_a, prop_b, rt, rec]).insert()
# set up crawler, first cfood defining a TestType record with
# identifying_prop and prop_b, but not prop_a ...
crawler_definition = {
"DictTest": {
"type": "DictElement",
"match": "(.*)",
"records": {
"TestType": {}
},
"subtree": {
"identifying_element": {
"type": "TextElement",
"match_name": "ident",
"match_value": "(?P<ident_value>.*)",
"records": {
"TestType": {
"identifying_prop": "$ident_value"
}
}
},
"other_element": {
"type": "TextElement",
"match_name": "prop_b",
"match_value": "(?P<other_value>.*)",
"records": {
"TestType": {
"prop_b": "$other_value"
}
}
}
}
}
}
# register identifiable for TestType
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable("TestType", db.RecordType().add_parent(
name="TestType").add_property(name="identifying_prop"))
crawler = Crawler(identifiableAdapter=ident)
converter_registry = create_converter_registry(crawler_definition)
# the dictionary to be crawled...
test_dict = {
"ident": "identifier",
"prop_b": "something_else"
}
crawler.generate_run_id()
records = scan_structure_elements(
DictElement("TestDict", test_dict), crawler_definition, converter_registry)
assert len(records) == 1
rec_crawled = records[0]
assert rec_crawled.parents[0].name == "TestType"
assert rec_crawled.get_property("identifying_prop") is not None
assert rec_crawled.get_property("identifying_prop").value == "identifier"
assert rec_crawled.get_property("prop_b") is not None
assert rec_crawled.get_property("prop_b").value == "something_else"
# no interaction with the database yet, so the record shouldn't have a prop_a yet
assert rec_crawled.get_property("prop_a") is None
# synchronize with database and update the record
ins, ups = crawler.synchronize(crawled_data=records)
assert len(ins) == 0
assert len(ups) == 1
# retrieve and check that name and properties have been combined correctly
rec_retrieved = db.Record(id=rec.id).retrieve()
assert rec_retrieved.name == rec.name
assert rec_retrieved.get_property(
"identifying_prop").value == rec.get_property("identifying_prop").value
assert rec_retrieved.get_property(
"prop_a").value == rec.get_property("prop_a").value
assert rec_retrieved.get_property(
"identifying_prop").value == rec_crawled.get_property("identifying_prop").value
assert rec_retrieved.get_property(
"prop_b").value == rec_crawled.get_property("prop_b").value
def test_issue_83(clear_database):
"""https://gitlab.com/linkahead/linkahead-crawler/-/issues/83. Test that
names don't need to be unique for referenced entities if they are not part
of the identifiable.
"""
# Very simple data model
identifying_prop = db.Property(name="IdentifyingProp", datatype=db.INTEGER).insert()
referenced_type = db.RecordType(name="ReferencedType").add_property(
name=identifying_prop.name, importance=db.OBLIGATORY).insert()
referencing_type = db.RecordType(name="ReferencingType").add_property(
name=referenced_type.name, datatype=db.LIST(referenced_type.name)).insert()
# Define identifiables. ReferencingType by name, ReferencedType by
# IdentifyingProp and not by name.
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable(referenced_type.name, db.RecordType().add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name))
ident.register_identifiable(referencing_type.name, db.RecordType().add_parent(
name=referencing_type.name).add_property(name="name"))
crawler = Crawler(identifiableAdapter=ident)
ref_target1 = db.Record(name="RefTarget").add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name, value=1)
ref_target2 = db.Record(name="RefTarget").add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name, value=2)
referencing1 = db.Record(name="Referencing1").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1])
referencing2 = db.Record(name="Referencing2").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target2])
referencing3 = db.Record(name="Referencing3").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1,
ref_target2])
records = db.Container().extend(
[ref_target1, ref_target2, referencing1, referencing2, referencing3])
ins, ups = crawler.synchronize(crawled_data=records, unique_names=False)
assert len(ins) == len(records)
assert len(ups) == 0
retrieved_target1 = db.execute_query(
f"FIND {referenced_type.name} WITH {identifying_prop.name}=1", unique=True)
retrieved_target2 = db.execute_query(
f"FIND {referenced_type.name} WITH {identifying_prop.name}=2", unique=True)
assert retrieved_target2.name == retrieved_target1.name
assert retrieved_target1.name == ref_target1.name
assert retrieved_target1.id != retrieved_target2.id
retrieved_referencing1 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing1.name}", unique=True)
assert retrieved_referencing1.get_property(referenced_type.name) is not None
assert retrieved_referencing1.get_property(referenced_type.name).value == [
retrieved_target1.id]
assert retrieved_referencing1.get_property(referenced_type.name).value != [
retrieved_target2.id]
retrieved_referencing2 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing2.name}", unique=True)
assert retrieved_referencing2.get_property(referenced_type.name) is not None
assert retrieved_referencing2.get_property(referenced_type.name).value == [
retrieved_target2.id]
assert retrieved_referencing2.get_property(referenced_type.name).value != [
retrieved_target1.id]
retrieved_referencing3 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing3.name}", unique=True)
assert retrieved_referencing3.get_property(referenced_type.name) is not None
assert len(retrieved_referencing3.get_property(referenced_type.name).value) == 2
assert retrieved_target1.id in retrieved_referencing3.get_property(referenced_type.name).value
assert retrieved_target2.id in retrieved_referencing3.get_property(referenced_type.name).value
def test_indiscale_113(clear_database):
"""Somewhat mysterious failures to resolve references in
split_into_inserts_and_updates, see
https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/113
"""
# Create and insert minimal datamodel
datamodel_str = """
Event:
recommended_properties:
Basis:
Campaign:
Basis:
Campaign:
recommended_properties:
Basis:
"""
model = parse_model_from_string(datamodel_str)
model.sync_data_model(noquestion=True)
# Register identifiables, everything is identified by name
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable("Event", db.RecordType().add_parent(
name="Event").add_property(name="name"))
ident.register_identifiable("Basis", db.RecordType().add_parent(
name="Basis").add_property(name="name"))
ident.register_identifiable("Campaign", db.RecordType().add_parent(
name="Campaign").add_property(name="name"))
crawler = Crawler(identifiableAdapter=ident)
# Add records: event references basis and campaign, campaign references
# basis.
basis = db.Record(name="Poseidon").add_parent(name="Basis")
campaign = db.Record(name="POS386").add_parent(
name="Campaign").add_property(name="Basis", value=basis)
event = db.Record(name="GeoB13952").add_parent(name="Event")
event.add_property(name="Basis", value=basis)
event.add_property(name="Campaign", value=campaign)
# basis and campaign already exist in the db
db.Container().extend([basis, campaign]).insert()
# redefine to trigger resolving
basis = db.Record(name="Poseidon").add_parent(name="Basis")
campaign = db.Record(name="POS386").add_parent(
name="Campaign").add_property(name="Basis", value=basis)
recs = [event, basis, campaign]
ins, ups = crawler.synchronize(crawled_data=recs, unique_names=False)
# There is only one event to be inserted
assert len(ins) == 1
# Nothing to do for the existing ents
assert len(ups) == 0
assert ins[0].name == event.name
def test_indiscale_87(clear_database):
"""Handle long string queries gracefully.
https://gitlab.com/linkahead/linkahead-crawler/-/issues/87
"""
prop = db.Property(name="str", datatype=db.TEXT).insert()
rt = db.RecordType(name="RT1").add_property(prop).insert()
strings = [
"X123456789" * 26,
"X" * 260,
"X123456789" * 25 + "9876543210",
]
recs = [
db.Record().add_parent(rt).add_property(name="str", value=string).insert()
for string in strings
]
idents = [
Identifiable(record_type="RT1", properties={"str": string})
for string in strings
]
adapter = CaosDBIdentifiableAdapter()
for rec, ident in zip(recs, idents):
print(f"Testing: ...{rec.get_property('str').value[-10:]}")
retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
# print(rec)
# print(retrieved)
print(db.apiutils.compare_entities(rec, retrieved))
assert db.apiutils.empty_diff(rec, retrieved)
print("---")
# add another, harmless, property
prop2 = db.Property(name="someint", datatype=db.INTEGER).insert()
rt.add_property(prop2).update()
string = "Y123456789" * 26
numbers = [23, 42]
recs = [
db.Record().add_parent(rt).add_property(name="str", value=string).add_property(
name="someint", value=number).insert()
for number in numbers
]
idents = [Identifiable(record_type="RT1", properties={"str": string})]
# Ambiguous result
with raises(RuntimeError, match=".*unambiguously.*"):
retrieved = adapter.retrieve_identified_record_for_identifiable(idents[0])
# Upgrade new property to be identifying
idents = [
Identifiable(record_type="RT1", properties={"str": string, "someint": number})
for number in numbers
]
for rec, ident in zip(recs, idents):
print(f"Testing: someint={rec.get_property('someint').value}")
retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
# print(rec)
# print(retrieved)
print(db.apiutils.compare_entities(rec, retrieved))
assert db.apiutils.empty_diff(rec, retrieved)
print("---")
def test_issue_14(clear_database):
"""
Issue title: Some parent updates are required before inserts
https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/14
"""
rt1 = db.RecordType(name="RT1")
rt2 = db.RecordType(name="RT2").insert()
rt1.add_property(rt2, importance=db.OBLIGATORY)
rt1.insert()
r = db.Record()
r.add_parent(rt1)
with tempfile.NamedTemporaryFile() as tmpf:
f = db.File(name="test_parent", path="parent_test/file.txt", file=tmpf.name)
f.insert()
# We create a clean new file object here:
f2 = db.File(name="test_parent", path="parent_test/file.txt", file=tmpf.name)
f2.add_parent(rt2)
r.add_property(name="RT2", value=f2)
# Current state in the database: File without parents
f_test_base = db.File(name="test_parent").retrieve()
assert len(f_test_base.parents) == 0
assert len(db.execute_query("FIND Record")) == 0
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable("RT1", db.RecordType().add_parent(
name="RT1").add_property(name="RT2"))
crawler = Crawler(identifiableAdapter=ident)
crawler.synchronize(crawled_data=[f2, r])
f_test = db.File(name="test_parent").retrieve()
assert len(f_test.parents) == 1
assert f_test.parents[0].name == "RT2"
records = db.execute_query("FIND Record")
assert len(records) == 1
assert records[0].get_property("RT2").value == f_test.id