Skip to content
Snippets Groups Projects
Commit 1919ee6b authored by florian's avatar florian
Browse files

Merge branch 'dev' into f-prefix

parents 01329a91 169cf927
No related branches found
No related tags found
2 merge requests!105REL: v0.4.0,!101ENH: add the 'add_prefix' argument
Pipeline #34105 failed
......@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- DateElementConverter: allows to interpret text as a date object
- the restricted_path argument allows to crawl only a subtree
- logging that provides a summary of what is inserted and updated
- You can now access the file system path of a structure element (if it has one) using the variable
name ``<converter name>.path``
- ``add_prefix`` and ``remove_prefix`` arguments for the command line interface
......
......@@ -25,6 +25,7 @@
an integration test module that runs a test against a (close to) real world example
"""
from caosdb.utils.register_tests import clear_database, set_test_key
import logging
import json
import os
......@@ -98,6 +99,7 @@ def create_identifiable_adapter():
def test_dataset(clear_database, usemodel, addfiles):
caplog.set_level(logging.DEBUG, logger="caoscrawler")
identifiable_path = os.path.join(DATADIR, "identifiables.yml")
crawler_definition_path = os.path.join(DATADIR, "dataset_cfoods.yml")
crawler_main(
......@@ -129,6 +131,11 @@ def test_dataset(clear_database, usemodel, addfiles):
"start_datetime='2022-02-10T16:36:48+01:00'") == 1
assert db.execute_query(f"FIND Event WITH latitude=53", unique=True)
# test logging
assert "Executed inserts" in caplog.text
assert "Going to insert" in caplog.text
assert "Executed updates" in caplog.text
def test_event_update(clear_database, usemodel, addfiles):
......
......@@ -49,6 +49,7 @@ from typing import Any, Optional, Type, Union
import caosdb as db
from caosadvancedtools.utils import create_entity_link
from caosadvancedtools.cache import UpdateCache, Cache
from caosadvancedtools.crawler import Crawler as OldCrawler
from caosdb.apiutils import (compare_entities, EntityMergeConflictError,
......@@ -1017,20 +1018,25 @@ class Crawler(object):
referencing_entities)
for record in to_be_updated]
# Merge with existing data to prevent unwanted overwrites
to_be_updated = self._merge_properties_from_remote(to_be_updated,
identified_records)
to_be_updated = self._merge_properties_from_remote(to_be_updated, identified_records)
# remove unnecessary updates from list by comparing the target records
# to the existing ones
to_be_updated = self.remove_unnecessary_updates(
to_be_updated, identified_records)
to_be_updated = self.remove_unnecessary_updates(to_be_updated, identified_records)
logger.info(f"Going to insert {len(to_be_inserted)} Entities and update "
f"{len(to_be_inserted)} Entities.")
if commit_changes:
self.execute_parent_updates_in_list(to_be_updated, securityMode=self.securityMode,
run_id=self.run_id, unique_names=unique_names)
logger.info(f"Added parent RecordTypes where necessary.")
self.execute_inserts_in_list(
to_be_inserted, self.securityMode, self.run_id, unique_names=unique_names)
logger.info(f"Executed inserts:\n"
+ self.create_entity_summary(to_be_inserted))
self.execute_updates_in_list(
to_be_updated, self.securityMode, self.run_id, unique_names=unique_names)
logger.info(f"Executed updates:\n"
+ self.create_entity_summary(to_be_updated))
update_cache = UpdateCache()
pending_inserts = update_cache.get_inserts(self.run_id)
......@@ -1045,6 +1051,25 @@ class Crawler(object):
return (to_be_inserted, to_be_updated)
@staticmethod
def create_entity_summary(entities: list[db.Entity]):
""" Creates a summary string reprensentation of a list of entities."""
parents = {}
for el in entities:
for pp in el.parents:
if pp.name not in parents:
parents[pp.name] = [el]
else:
parents[pp.name].append(el)
output = ""
for key, value in parents.items():
output += f"{key}:\n"
for el in value:
output += create_entity_link(el) + ", "
output = output[:-2] + "\n"
return output
@staticmethod
def inform_about_pending_changes(pending_changes, run_id, path, inserts=False):
# Sending an Email with a link to a form to authorize updates is
......
......@@ -982,3 +982,20 @@ def test_deprecated_prefix_option():
remove_prefix="to/be/removed")
assert "(deprecated) `prefix` and the `remove_prefix`" in str(ve.value)
def test_create_entity_summary():
assert "" == Crawler.create_entity_summary([]).strip()
entities = [
db.Record(id=1).add_parent("A"),
db.Record(id=4, name='a').add_parent("B"),
db.Record(id=5).add_parent("A"),
db.Record(id=6, name='b').add_parent("B"),
]
text = Crawler.create_entity_summary(entities).strip()
assert 'a' in text
assert 'b' in text
assert 'A:' in text
assert 'B:' in text
assert "<a href='/Entity/4'>a</a>, <a href='/Entity/6'>b</a>" in text
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment