Skip to content
Snippets Groups Projects
Commit 6cf10a47 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'f-merge-conflict-id' into 'dev'

Fix merge conflict in split_into_inserts_and_updates

See merge request !62
parents 6c746f86 7d0bccef
No related branches found
No related tags found
2 merge requests!91Release 0.3,!62Fix merge conflict in split_into_inserts_and_updates
Pipeline #30991 passed
......@@ -8,11 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ##
### Added ###
- Identifiable class to represent the information used to identify Records.
- Added some StructureElements: BooleanElement, FloatElement, IntegerElement,
ListElement, DictElement
### Changed ###
- Some StructureElements changed (see "How to upgrade" in the docs):
- Dict, DictElement and DictDictElement were merged into DictElement.
- DictTextElement and TextElement were merged into TextElement. The "match"
......@@ -27,6 +29,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed ###
- [#39](https://gitlab.com/caosdb/caosdb-crawler/-/issues/39) Merge conflicts in
`split_into_inserts_and_updates` when cached entity references a record
without id
### Security ###
### Documentation ###
......
......@@ -20,7 +20,7 @@ packages = find:
python_requires = >=3.8
install_requires =
importlib-resources
caosdb > 0.9.0
caosdb > 0.10.0
caosadvancedtools >= 0.6.0
yaml-header-tools >= 0.2.1
pyyaml
......
......@@ -29,36 +29,41 @@ the acuired data with CaosDB.
"""
from __future__ import annotations
import argparse
import importlib
from caosadvancedtools.cache import UpdateCache, Cache
import uuid
import sys
import logging
import os
import sys
import uuid
import warnings
import yaml
from argparse import RawTextHelpFormatter
from collections import defaultdict
from copy import deepcopy
from enum import Enum
import logging
from importlib_resources import files
import argparse
from argparse import RawTextHelpFormatter
from jsonschema import validate
from typing import Any, Optional, Type, Union
import caosdb as db
from caosadvancedtools.cache import UpdateCache, Cache
from caosadvancedtools.crawler import Crawler as OldCrawler
import warnings
from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
merge_entities)
from caosdb.common.datatype import is_reference
from .stores import GeneralStore, RecordStore
from .identified_cache import IdentifiedCache
from .identifiable import Identifiable
from .structure_elements import StructureElement, Directory
from .converters import Converter, DirectoryConverter
from .identifiable import Identifiable
from .identifiable_adapters import (IdentifiableAdapter,
LocalStorageIdentifiableAdapter,
CaosDBIdentifiableAdapter)
from collections import defaultdict
from typing import Any, Optional, Type, Union
from caosdb.apiutils import compare_entities, merge_entities
from copy import deepcopy
from jsonschema import validate
from .identified_cache import IdentifiedCache
from .macros import defmacro_constructor, macro_constructor
from .stores import GeneralStore, RecordStore
from .structure_elements import StructureElement, Directory
logger = logging.getLogger(__name__)
......@@ -759,7 +764,10 @@ class Crawler(object):
elif self.get_from_any_cache(identifiable) is not None:
# We merge the two in order to prevent loss of information
newrecord = self.get_from_any_cache(identifiable)
try:
merge_entities(newrecord, record)
except EntityMergeConflictError:
continue
Crawler.bend_references_to_new_object(
old=record, new=newrecord, entities=flat + to_be_updated + to_be_inserted)
......@@ -997,7 +1005,8 @@ class Crawler(object):
if self.identifiableAdapter is None:
raise RuntimeError("Should not happen.")
to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(crawled_data)
to_be_inserted, to_be_updated = self.split_into_inserts_and_updates(
crawled_data)
# TODO: refactoring of typo
for el in to_be_updated:
......
......@@ -22,15 +22,14 @@
from pytest import mark
import caosdb as db
from caoscrawler.crawl import Crawler
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.structure_elements import DictElement
from test_tool import rfp
@mark.xfail(
reason="Wait until value conversion in dicts is fixed, see "
"https://gitlab.com/caosdb/caosdb-crawler/-/issues/10."
)
def test_issue_10():
"""Test integer-to-float conversion in dictionaries"""
crawler_definition = {
......@@ -68,3 +67,46 @@ def test_issue_10():
assert records[0].parents[0].name == "TestRec"
assert records[0].get_property("float_prop") is not None
assert float(records[0].get_property("float_prop").value) == 4.0
def test_issue_39():
"""Test for merge conflicts in
`crawl.Crawler.split_into_inserts_and_updates` (see
https://gitlab.com/caosdb/caosdb-crawler/-/issues/39).
"""
crawler = Crawler(debug=True)
# For trying and failing to retrieve remotely identified records
def _fake_retrieve(*args, **kwargs):
return None
ident = CaosDBIdentifiableAdapter()
# identifiable property is just name for both Record Types
ident.register_identifiable("RT_A", db.RecordType().add_parent(
name="RT_A").add_property(name="name"))
ident.register_identifiable("RT_B", db.RecordType().add_parent(
name="RT_B").add_property(name="name"))
# overwrite retrieve
ident.retrieve_identified_record_for_identifiable = _fake_retrieve
crawler.identifiableadapter = ident
# a1 (has id) references b1 (has no id)
a1 = db.Record(name="A", id=101).add_parent(name="RT_A")
b1 = db.Record(name="B").add_parent(name="RT_B")
a1.add_property(name="RT_B", value=b1)
# a2 (no id) references b2 (has id)
a2 = db.Record(name="A").add_parent(name="RT_A")
b2 = db.Record(name="B", id=102).add_parent(name="RT_B")
a2.add_property(name="RT_B", value=b2)
flat_list = [b1, a1, a2, b2]
# the two records with ids exist remotely
crawler.add_to_remote_existing_cache(a1)
crawler.add_to_remote_existing_cache(b2)
# this would result in a merge conflict before
ins, ups = crawler.split_into_inserts_and_updates(flat_list)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment