Skip to content
Snippets Groups Projects
Commit f0ebf631 authored by Henrik tom Wörden's avatar Henrik tom Wörden
Browse files

Merge branch 'f-identifiable-test' into 'dev'

F identifiable test

See merge request !20
parents 666defe4 3f7daf8a
No related branches found
No related tags found
2 merge requests!53Release 0.1,!20F identifiable test
Pipeline #23206 passed
......@@ -318,6 +318,13 @@ Data:
Dataset:
Project: $Project
subtree:
name_element:
type: DictTextElement
match_name: "name"
match_value: "(?P<name>.*)"
records:
Project:
name: $name
full_name_element:
type: DictTextElement
match_name: "full_name"
......
license:
- name
project_type:
- name
Keyword:
- name
Taxon:
- name
Person:
- email
# - full_name
Dataset:
- title
# - DOI
Event:
- longitude
- latitude
- start_datetime
Dataspace:
- dataspace_id
Project:
- name
......@@ -29,7 +29,7 @@ import os
import caosdb as db
from caoscrawler.crawl import Crawler
from caoscrawler.crawl import Crawler, crawler_main
from caoscrawler.converters import JSONFileConverter, DictConverter
from caoscrawler.identifiable_adapters import CaosDBIdentifiableAdapter
from caoscrawler.structure_elements import File, JSONFile, Directory
......@@ -84,7 +84,6 @@ def clear_database():
def test_dataset(
clear_database,
usemodel):
# json_file_path = rfp("test_directories", "single_file_test_data", "testjson.json")
ident = CaosDBIdentifiableAdapter()
ident.register_identifiable(
......@@ -94,7 +93,7 @@ def test_dataset(
ident.register_identifiable("Person", db.RecordType(
).add_parent("Person").add_property("full_name"))
crawler = Crawler(debug=True, identifiableAdapter=ident)
crawler = Crawler(identifiableAdapter=ident)
crawler_definition = crawler.load_definition(
os.path.join(DATADIR, "dataset_cfoods.yml"))
# print(json.dumps(crawler_definition, indent=3))
......@@ -107,13 +106,7 @@ def test_dataset(
crawler_definition,
converter_registry
)
subd = crawler.debug_tree
subc = crawler.debug_metadata
# print(json.dumps(subc, indent=3))
# print(subd)
# print(subc)
# print(records)
ins, ups = crawler.synchronize()
crawler.synchronize()
dataspace = db.execute_query("FIND RECORD Dataspace WITH name=35 AND dataspace_id=20002 AND "
"archived=FALSE AND url='https://datacloud.de/index.php/f/7679'"
......@@ -130,3 +123,85 @@ def test_dataset(
"") == 1
assert db.execute_query(f"COUNT RECORD with id={dataset.id} AND WHICH REFERENCES Event WITH "
"start_datetime='2022-02-10T16:36:48+01:00'") == 1
@pytest.mark.xfail(
reason="Reference properties are not updated correctly. "
"See https://gitlab.indiscale.com/caosdb/src/caosdb-crawler/-/issues/12."
)
def test_event_update(clear_database, usemodel):
identifiable_path = os.path.join(DATADIR, "identifiables.yml")
crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
# TODO(fspreck): Use crawler_main
crawler_main(
os.path.join(DATADIR, 'data'),
crawler_definition_path,
identifiable_path,
True,
os.path.join(DATADIR, "provenance.yml"),
False,
True,
""
)
old_dataset_rec = db.execute_query(
"FIND RECORD Dataset WHICH HAS AN EVENT WITH location='Bremen, Germany'")
assert len(old_dataset_rec) == 1
old_dataset_rec = old_dataset_rec[0]
assert old_dataset_rec.get_property("Event").datatype == db.LIST("Event")
assert len(old_dataset_rec.get_property("Event").value) == 1
old_event_rec = db.Record(
id=old_dataset_rec.get_property("Event").value[0]).retrieve()
# TODO(fspreck): crawl again manually, edit the event records in the update
# list, synchronize, and test whether the events have been updated.
ident = CaosDBIdentifiableAdapter()
ident.load_from_yaml_definition(identifiable_path)
second_crawler = Crawler(identifiableAdapter=ident)
crawler_definition = second_crawler.load_definition(
crawler_definition_path)
converter_registry = second_crawler.load_converters(crawler_definition)
records = second_crawler.start_crawling(
Directory("data", os.path.join(DATADIR, "data")),
crawler_definition,
converter_registry
)
for rec in records:
if rec.parents[0].name == "Event":
rec.get_property("longitude").value = 0.0
rec.get_property("latitude").value = 0.0
rec.get_property("location").value = "Origin"
elif rec.parents[0].name == "Dataset":
rec.get_property("Event").value[0].get_property(
"longitude").value = 0.0
rec.get_property("Event").value[0].get_property(
"latitude").value = 0.0
rec.get_property("Event").value[0].get_property(
"location").value = "Origin"
second_crawler.synchronize()
# Dataset is still the same Record, but with an updated event
new_dataset_rec = db.Record(id=old_dataset_rec.id).retrieve()
for prop in old_dataset_rec.get_properties():
if not prop.name == "Event":
assert new_dataset_rec.get_property(
prop.name).datatype == prop.datatype
assert new_dataset_rec.get_property(
prop.name).value == prop.value
assert new_dataset_rec.get_property("Event").datatype == db.LIST("Event")
assert new_dataset_rec.get_property("Event").value is not None
assert len(new_dataset_rec.get_property("Event").value) == 1
assert new_dataset_rec.get_property("Event").value[0] != old_event_rec.id
# The event has new properties
new_event_rec = db.Record(
id=new_dataset_rec.get_property("Event").value[0]).retrieve()
assert new_event_rec.get_property("longitude").value == 0.0
assert new_event_rec.get_property("latitude").value == 0.0
assert new_event_rec.get_property("location").value == "Origin"
assert new_event_rec.get_property(
"start_datetime").value == old_event_rec.get_property("start_datetime").value
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment