Skip to content
Snippets Groups Projects
Commit 5319a73a authored by florian's avatar florian
Browse files

Merge branch 'dev' into f-fix-resolve-references-with-cached

parents 074fecad 2aa524bf
No related branches found
No related tags found
3 merge requests!160STY: styling,!141FIX: Resolve referneces to existing entities correctly,!140New f fix merge
Pipeline #44276 failed
# CaosDB-Crawler
## Welcome
This is the repository of the CaosDB-Crawler, a tool for automatic data
insertion into [CaosDB](https://gitlab.com/caosdb/caosdb-meta).
This is the repository of the LinkAhead Crawler, a tool for automatic data
insertion into [LinkAhead](https://gitlab.com/linkahead/linkahead).
This is a new implementation resolving problems of the original implementation
in [caosdb-advancedtools](https://gitlab.com/caosdb/caosdb-advanced-user-tools)
in [LinkAhead Python Advanced User Tools](https://gitlab.com/caosdb/caosdb-advanced-user-tools)
## Setup
......@@ -16,20 +15,23 @@ setup this code.
## Further Reading
Please refer to the [official documentation](https://docs.indiscale.com/caosdb-crawler/) of the CaosDB-Crawler for more information.
Please refer to the [official documentation](https://docs.indiscale.com/caosdb-crawler/) of the LinkAhead Crawler for more information.
## Contributing
Thank you very much to all contributers—[past, present](https://gitlab.com/caosdb/caosdb/-/blob/dev/HUMANS.md), and prospective ones.
Thank you very much to all contributers—[past,
present](https://gitlab.com/linkahead/linkahead/-/blob/main/HUMANS.md), and prospective
ones.
### Code of Conduct
By participating, you are expected to uphold our [Code of Conduct](https://gitlab.com/caosdb/caosdb/-/blob/dev/CODE_OF_CONDUCT.md).
By participating, you are expected to uphold our [Code of
Conduct](https://gitlab.com/linkahead/linkahead/-/blob/main/CODE_OF_CONDUCT.md).
### How to Contribute
* You found a bug, have a question, or want to request a feature? Please
[create an issue](https://gitlab.com/caosdb/caosdb-crawler).
[create an issue](https://gitlab.com/linkahead/linkahead-crawler/-/issues).
* You want to contribute code?
* **Forking:** Please fork the repository and create a merge request in GitLab and choose this repository as
target. Make sure to select "Allow commits from members who can merge the target branch" under
......@@ -38,9 +40,8 @@ By participating, you are expected to uphold our [Code of Conduct](https://gitla
* **Code style:** This project adhers to the PEP8 recommendations, you can test your code style
using the `autopep8` tool (`autopep8 -i -r ./`). Please write your doc strings following the
[NumpyDoc](https://numpydoc.readthedocs.io/en/latest/format.html) conventions.
* You can also contact us at **info (AT) caosdb.de** and join the
CaosDB community on
[#caosdb:matrix.org](https://matrix.to/#/!unwwlTfOznjEnMMXxf:matrix.org).
* You can also join the LinkAhead community on
[#linkahead:matrix.org](https://matrix.to/#/!unwwlTfOznjEnMMXxf:matrix.org).
There is the file `unittests/records.xml` that servers as a dummy for a server state with files.
......
......@@ -114,7 +114,7 @@ def test_issue_23(clear_database):
assert rec_crawled.get_property("identifying_prop").value == "identifier"
assert rec_crawled.get_property("prop_b") is not None
assert rec_crawled.get_property("prop_b").value == "something_else"
# no interaction with the database yet, so the rrecord shouldn't have a prop_a yet
# no interaction with the database yet, so the record shouldn't have a prop_a yet
assert rec_crawled.get_property("prop_a") is None
# synchronize with database and update the record
......@@ -133,3 +133,78 @@ def test_issue_23(clear_database):
"identifying_prop").value == rec_crawled.get_property("identifying_prop").value
assert rec_retrieved.get_property(
"prop_b").value == rec_crawled.get_property("prop_b").value
def test_issue_83(clear_database):
"""https://gitlab.com/linkahead/linkahead-crawler/-/issues/83. Test that
names don't need to be unique for referenced entities if they are not part
of the identifiable.
"""
# Very simple data model
identifying_prop = db.Property(name="IdentifyingProp", datatype=db.INTEGER).insert()
referenced_type = db.RecordType(name="ReferencedType").add_property(
name=identifying_prop.name, importance=db.OBLIGATORY).insert()
referencing_type = db.RecordType(name="ReferencingType").add_property(
name=referenced_type.name, datatype=db.LIST(referenced_type.name)).insert()
# Define identifiables. ReferencingType by name, ReferencedType by
# IdentifyingProp and not by name.
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable(referenced_type.name, db.RecordType().add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name))
ident.register_identifiable(referencing_type.name, db.RecordType().add_parent(
name=referencing_type.name).add_property(name="name"))
crawler = Crawler(identifiableAdapter=ident)
ref_target1 = db.Record(name="RefTarget").add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name, value=1)
ref_target2 = db.Record(name="RefTarget").add_parent(
name=referenced_type.name).add_property(name=identifying_prop.name, value=2)
referencing1 = db.Record(name="Referencing1").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target1])
referencing2 = db.Record(name="Referencing2").add_parent(
name=referencing_type.name).add_property(name=referenced_type.name, value=[ref_target2])
referencing3 = db.Record(name="Referencing3").add_parent(name=referencing_type.name).add_property(
name=referenced_type.name, value=[ref_target1, ref_target2])
records = db.Container().extend(
[ref_target1, ref_target2, referencing1, referencing2, referencing3])
ins, ups = crawler.synchronize(crawled_data=records, unique_names=False)
assert len(ins) == len(records)
assert len(ups) == 0
retrieved_target1 = db.execute_query(
f"FIND {referenced_type.name} WITH {identifying_prop.name}=1", unique=True)
retrieved_target2 = db.execute_query(
f"FIND {referenced_type.name} WITH {identifying_prop.name}=2", unique=True)
assert retrieved_target2.name == retrieved_target1.name
assert retrieved_target1.name == ref_target1.name
assert retrieved_target1.id != retrieved_target2.id
retrieved_referencing1 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing1.name}", unique=True)
assert retrieved_referencing1.get_property(referenced_type.name) is not None
assert retrieved_referencing1.get_property(referenced_type.name).value == [
retrieved_target1.id]
assert retrieved_referencing1.get_property(referenced_type.name).value != [
retrieved_target2.id]
retrieved_referencing2 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing2.name}", unique=True)
assert retrieved_referencing2.get_property(referenced_type.name) is not None
assert retrieved_referencing2.get_property(referenced_type.name).value == [
retrieved_target2.id]
assert retrieved_referencing2.get_property(referenced_type.name).value != [
retrieved_target1.id]
retrieved_referencing3 = db.execute_query(
f"FIND {referencing_type.name} WITH name={referencing3.name}", unique=True)
assert retrieved_referencing3.get_property(referenced_type.name) is not None
assert len(retrieved_referencing3.get_property(referenced_type.name).value) == 2
assert retrieved_target1.id in retrieved_referencing3.get_property(referenced_type.name).value
assert retrieved_target2.id in retrieved_referencing3.get_property(referenced_type.name).value
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment