diff --git a/CHANGELOG.md b/CHANGELOG.md index 65547e6b8017e53d711955f9f9fbee00dc739b86..7228e5fc24126d3d07f510e144d8ad03e137d024 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,26 +8,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## ### Added ### -* 'transform' sections can be added to a CFood to apply functions to values stored in variables. + +* `transform` sections can be added to a CFood to apply functions to values stored in variables. * default transform functions: submatch, split and replace. * `*` can now be used as a wildcard in the identifiables parameter file to denote that any Record may reference the identified one. +* `crawl.TreatedRecordLookUp` class replacing the old (and slow) + `identified_cache` module. The new class now handles all records identified by + id, path, or identifiable simultaneously. See API docs for more info on how to + add to and get from the new lookup class. ### Changed ### -- If the `parents` key is used in a cfood at a lower level for a Record that + +* If the `parents` key is used in a cfood at a lower level for a Record that already has a Parent (because it was explicitly given or the default Parent), the old Parent(s) are now overwritten with the value belonging to the `parents` key. -- If a registered identifiable states, that a reference by a Record with parent +* If a registered identifiable states, that a reference by a Record with parent RT1 is needed, then now also references from Records that have a child of RT1 as parent are accepted. -- More aggressive caching. +* More aggressive caching. ### Deprecated ### -- `IdentifiableAdapter.get_file` + +* `IdentifiableAdapter.get_file` ### Removed ### +* `identified_cache` module which was replaced by the `crawl.TreatedRecordLookUp` class. + ### Fixed ### * Empty Records can now be created (https://gitlab.com/caosdb/caosdb-crawler/-/issues/27) @@ -40,6 +49,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 handles cases correctly in which entities retrieved from the server have to be merged with local entities that both reference another, already existing entity +* A corner case in `split_into_inserts_and_updates` whereby two records created + in different places in the cfood definition would not be merged if both were + identified by the same LinkAhead id ### Security ### diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 13d9bba0306d0cfd221a8b43db89d3a8a7b08ecb..d30472c9aa9745fb985358f86015f01286a41680 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -243,7 +243,7 @@ class TreatedRecordLookUp(): In the case that the Record is not yet in the remote server, there cannot be a Python object with an ID. Thus we might have one with a path and one with an identifiable. If that Record does not yet exist, it is necessary that both Python objects have at least either the path or - the identifiable in common. Currently, this has to be assured by the user. + the identifiable in common. """ def __init__(self): diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index 23b374849543a6e132c605cd83ec0b51e4c2cbe9..5eecb10630bb478ff9070ad97eb5acca9a963fab 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -988,8 +988,6 @@ def test_treated_record_lookup(): assert trlu._id_look_up[exist.id] is exist # can be accessed via get_existing assert trlu.get_existing(db.Record(id=1)) is exist - # assert trlu.is_existing(db.Record(id=1)) # TODO remove? - # assert not trlu.is_missing(db.Record(id=1)) # TODO remove? miss = db.Record() # exception when identifiable is missing @@ -1003,8 +1001,6 @@ def test_treated_record_lookup(): assert trlu._identifiable_look_up[ident.get_representation()] is miss # can be accessed via get_missing assert trlu.get_missing(db.Record(), Identifiable(name='a')) is miss - # assert not trlu.is_existing(db.Record()) # TODO remove? - # assert trlu.is_missing(db.Record()) # TODO remove? fi = db.File(path='a', id=2) trlu.add(fi)