diff --git a/CHANGELOG.md b/CHANGELOG.md index 65547e6b8017e53d711955f9f9fbee00dc739b86..fc33875e5f7e2219d963b6d5292ecd6f09d55b41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,26 +8,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## ### Added ### -* 'transform' sections can be added to a CFood to apply functions to values stored in variables. + +* `transform` sections can be added to a CFood to apply functions to values stored in variables. * default transform functions: submatch, split and replace. * `*` can now be used as a wildcard in the identifiables parameter file to denote that any Record may reference the identified one. +* `crawl.TreatedRecordLookUp` class replacing the old (and slow) + `identified_cache` module. The new class now handles all records identified by + id, path, or identifiable simultaneously. See API docs for more info on how to + add to and get from the new lookup class. ### Changed ### -- If the `parents` key is used in a cfood at a lower level for a Record that + +* If the `parents` key is used in a cfood at a lower level for a Record that already has a Parent (because it was explicitly given or the default Parent), the old Parent(s) are now overwritten with the value belonging to the `parents` key. -- If a registered identifiable states, that a reference by a Record with parent +* If a registered identifiable states, that a reference by a Record with parent RT1 is needed, then now also references from Records that have a child of RT1 as parent are accepted. -- More aggressive caching. +* More aggressive caching. ### Deprecated ### -- `IdentifiableAdapter.get_file` + +* `IdentifiableAdapter.get_file` ### Removed ### +* `identified_cache` module which was replaced by the `crawl.TreatedRecordLookUp` class. + ### Fixed ### * Empty Records can now be created (https://gitlab.com/caosdb/caosdb-crawler/-/issues/27) diff --git a/src/caoscrawler/crawl.py b/src/caoscrawler/crawl.py index 8309f782906dd8e2069ea32df43d3a9b5f88b013..fee5c42d354269a7d6429a32f78cff62c8e29f05 100644 --- a/src/caoscrawler/crawl.py +++ b/src/caoscrawler/crawl.py @@ -243,7 +243,7 @@ class TreatedRecordLookUp(): In the case that the Record is not yet in the remote server, there cannot be a Python object with an ID. Thus we might have one with a path and one with an identifiable. If that Record does not yet exist, it is necessary that both Python objects have at least either the path or - the identifiable in common. Currently, this has to be assured by the user. + the identifiable in common. """ def __init__(self): diff --git a/unittests/test_crawler.py b/unittests/test_crawler.py index 7c62b004b207da42a37bf26dce295810ec9e3075..dbce182af9c8dd4a0ac6921364d7c0fbea53dddf 100644 --- a/unittests/test_crawler.py +++ b/unittests/test_crawler.py @@ -978,8 +978,6 @@ def test_treated_record_lookup(): assert trlu._id_look_up[exist.id] is exist # can be accessed via get_existing assert trlu.get_existing(db.Record(id=1)) is exist - # assert trlu.is_existing(db.Record(id=1)) # TODO remove? - # assert not trlu.is_missing(db.Record(id=1)) # TODO remove? miss = db.Record() # exception when identifiable is missing @@ -993,8 +991,6 @@ def test_treated_record_lookup(): assert trlu._identifiable_look_up[ident.get_representation()] is miss # can be accessed via get_missing assert trlu.get_missing(db.Record(), Identifiable(name='a')) is miss - # assert not trlu.is_existing(db.Record()) # TODO remove? - # assert trlu.is_missing(db.Record()) # TODO remove? fi = db.File(path='a', id=2) trlu.add(fi)