Skip to content
Snippets Groups Projects
Verified Commit 3c44ccfd authored by Daniel Hornung's avatar Daniel Hornung
Browse files

FIX Mitigation of server problem with long strings as POV values.

parent 8d3334ac
No related branches found
No related tags found
2 merge requests!160STY: styling,!155Mitigation of server problem with long strings as POV values
Pipeline #47647 passed with warnings
......@@ -64,6 +64,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* A corner case in `split_into_inserts_and_updates` whereby two records created
in different places in the cfood definition would not be merged if both were
identified by the same LinkAhead id
* [#87](https://gitlab.com/linkahead/linkahead-crawler/-/issues/87) Handle long strings more gracefully. The crawler sometimes runs into
[linkahead-server#101](https://gitlab.com/linkahead/linkahead-server/-/issues/101), this is now mitigated.
### Security ###
......
......@@ -112,11 +112,11 @@ def crawler_extended(ident):
return cr, crawled_data, debug_tree
def test_ambigious_lookup(clear_database, usemodel, crawler, ident):
def test_ambiguous_lookup(clear_database, usemodel, crawler, ident):
ins, ups = crawler[0].synchronize(crawled_data=crawler[1])
proj = db.execute_query("FIND Project WITH identifier='SpeedOfLight'", unique=True)
with pytest.raises(RuntimeError, match=".*unambigiously.*"):
with pytest.raises(RuntimeError, match=".*unambiguously.*"):
print(crawler[0].identifiableAdapter.retrieve_identified_record_for_identifiable(
Identifiable(properties={'project': proj.id})))
......
......@@ -16,7 +16,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
from pytest import fixture, mark
from pytest import fixture, mark, raises
import linkahead as db
from linkahead.cached import cache_clear
......@@ -279,9 +279,9 @@ def test_indiscale_87(clear_database):
prop = db.Property(name="str", datatype=db.TEXT).insert()
rt = db.RecordType(name="RT1").add_property(prop).insert()
strings = [
"0123456789" * 26,
"0" * 260,
"0123456789" * 25 + "9876543210",
"X123456789" * 26,
"X" * 260,
"X123456789" * 25 + "9876543210",
]
recs = [
db.Record().add_parent(rt).add_property(name="str", value=string).insert()
......@@ -293,4 +293,39 @@ def test_indiscale_87(clear_database):
]
adapter = CaosDBIdentifiableAdapter()
for rec, ident in zip(recs, idents):
assert adapter.retrieve_identified_record_for_identifiable(ident) == rec
print(f"Testing: ...{rec.get_property('str').value[-10:]}")
retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
# print(rec)
# print(retrieved)
print(db.apiutils.compare_entities(rec, retrieved))
assert db.apiutils.empty_diff(rec, retrieved)
print("---")
# add another, harmless, property
prop2 = db.Property(name="someint", datatype=db.INTEGER).insert()
rt.add_property(prop2).update()
string = "Y123456789" * 26
numbers = [23, 42]
recs = [
db.Record().add_parent(rt).add_property(name="str", value=string).add_property(
name="someint", value=number).insert()
for number in numbers
]
idents = [Identifiable(record_type="RT1", properties={"str": string})]
# Ambiguous result
with raises(RuntimeError, match=".*unambiguously.*"):
retrieved = adapter.retrieve_identified_record_for_identifiable(idents[0])
# Upgrade new property to be identifying
idents = [
Identifiable(record_type="RT1", properties={"str": string, "someint": number})
for number in numbers
]
for rec, ident in zip(recs, idents):
print(f"Testing: someint={rec.get_property('someint').value}")
retrieved = adapter.retrieve_identified_record_for_identifiable(ident)
# print(rec)
# print(retrieved)
print(db.apiutils.compare_entities(rec, retrieved))
assert db.apiutils.empty_diff(rec, retrieved)
print("---")
......@@ -106,13 +106,16 @@ identifiabel, identifiable and identified record) for a Record.
"""
@staticmethod
def create_query_for_identifiable(ident: Identifiable):
def create_query_for_identifiable(ident: Identifiable, startswith: bool = False):
"""
This function is taken from the old crawler:
caosdb-advanced-user-tools/src/caosadvancedtools/crawler.py
uses the properties of ident to create a query that can determine
whether the required record already exists.
If ``startswith`` is True, use ``LIKE`` for long string values to test if the strings starts
with the first 200 characters of the value.
"""
query_string = "FIND RECORD "
......@@ -132,7 +135,9 @@ identifiabel, identifiable and identified record) for a Record.
if len(ident.properties) > 0:
query_string += " AND "
query_string += IdentifiableAdapter.create_property_query(ident)
query_string += IdentifiableAdapter.create_property_query(ident, startswith=startswith)
# TODO Can these cases happen at all with the current code?
if query_string.endswith(" AND WITH "):
query_string = query_string[:-len(" AND WITH ")]
if query_string.endswith(" AND "):
......@@ -140,15 +145,41 @@ identifiabel, identifiable and identified record) for a Record.
return query_string
@staticmethod
def create_property_query(entity: Identifiable):
def __create_pov_snippet(pname: str, pvalue, startswith: bool = False):
"""Return something like ``'name'='some value'`` or ``'name' LIKE 'some*'``.
If ``startswith`` is True, the value of strings will be cut off at 200 characters and a ``LIKE``
operator will be used to find entities matching at the beginning.
"""
if startswith and isinstance(pvalue, str) and len(pvalue) > 200:
operator_value_str = f" LIKE '{escape_squoted_text(pvalue[:200])}*'"
else:
operator_value_str = "='" + convert_value(pvalue) + "'"
result = "'" + escape_squoted_text(pname) + "'" + operator_value_str
return result
@staticmethod
def create_property_query(entity: Identifiable, startswith: bool = False):
"""Create a POV query part with the entity's properties.
Parameters
----------
entity: Identifiable
The Identifiable whose properties shall be used.
startswith: bool, optional
If True, check string typed properties against the first 200 characters only. Default is False.
"""
query_string = ""
pov = IdentifiableAdapter.__create_pov_snippet # Shortcut
for pname, pvalue in entity.properties.items():
if pvalue is None:
query_string += "'" + escape_squoted_text(pname) + "' IS NULL AND "
elif isinstance(pvalue, list):
for v in pvalue:
query_string += ("'" + escape_squoted_text(pname) + "'='" +
convert_value(v) + "' AND ")
query_string += pov(pname, v, startswith=startswith) + " AND "
# TODO: (for review)
# This code would allow for more complex identifiables with
......@@ -161,8 +192,7 @@ identifiabel, identifiable and identified record) for a Record.
# IdentifiableAdapter.create_property_query(p.value) +
# ") AND ")
else:
query_string += ("'" + escape_squoted_text(pname) + "'='" +
convert_value(pvalue) + "' AND ")
query_string += pov(pname, pvalue, startswith=startswith) + " AND "
# remove the last AND
return query_string[:-4]
......@@ -549,10 +579,28 @@ class CaosDBIdentifiableAdapter(IdentifiableAdapter):
def retrieve_identified_record_for_identifiable(self, identifiable: Identifiable):
query_string = self.create_query_for_identifiable(identifiable)
try:
candidates = cached_query(query_string)
except db.exceptions.HTTPServerError as err:
query_string = self.create_query_for_identifiable(identifiable, startswith=True)
candidates = cached_query(query_string).copy() # Copy against cache poisoning
# Test if the candidates really match all properties
for pname, pvalue in identifiable.properties.items():
popme = []
for i in range(len(candidates)):
this_prop = candidates[i].get_property(pname)
if this_prop is None:
popme.append(i)
continue
if not this_prop.value == pvalue:
popme.append(i)
for i in reversed(popme):
candidates.pop(i)
if len(candidates) > 1:
raise RuntimeError(
f"Identifiable was not defined unambigiously.\n{query_string}\nReturned the "
f"Identifiable was not defined unambiguously.\n{query_string}\nReturned the "
f"following {candidates}."
f"Identifiable:\n{identifiable.record_type}{identifiable.properties}")
if len(candidates) == 0:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment