diff --git a/src/linkahead/high_level_api.py b/src/linkahead/high_level_api.py index 45839d9b21295964175f9baebaab17d3bfc16f1e..092215295e252a80e0e5f711877acfe600cdce9a 100644 --- a/src/linkahead/high_level_api.py +++ b/src/linkahead/high_level_api.py @@ -686,24 +686,24 @@ class CaosDBPythonEntity(object): visited: dict = None) -> dict: """Serialize necessary information into a dict. - Parameters - ---------- +Parameters +---------- - without_metadata: bool, optional - If True don't set the metadata field in order to increase - readability. Not recommended if deserialization is needed. +without_metadata: bool, optional + If True don't set the metadata field in order to increase + readability. Not recommended if deserialization is needed. - plain_json: bool, optional - If True, serialize to a plain dict without any additional information besides the property values, - name and id. This should conform to the format as specified by the json schema generated by the - advanced user tools. It also sets all properties as top level items of the resulting dict. This - implies ``without_metadata = True +plain_json: bool, optional + If True, serialize to a plain dict without any additional information besides the property values, + name and id. This should conform to the format as specified by the json schema generated by the + advanced user tools. It also sets all properties as top level items of the resulting dict. This + implies ``without_metadata = True``. - Returns - ------- +Returns +------- - out: dict - A dict corresponding to this entity. +out: dict + A dict corresponding to this entity. ``. """ if plain_json: @@ -1086,3 +1086,74 @@ def query(query: str, objects = convert_to_python_object(res, references=references, resolve_references=resolve_references) return objects + + +def clean_json(data: Union[dict, list], no_remove_id: bool = False, no_id_name: bool = False, + no_remove_none: bool = False, + ) -> Union[dict, list]: + """Clean up a json object. + +This function does the following on each child element (each step can be switched off by given the +corresponding ``no_<step>`` option): + +- Turn id-name dicts into simple name strings: ``{"id": 123, "name": "foo"} -> "foo"``. This only + happens if there are no other keys except for id and name. +- Remove "id" keys from dicts. +- Remove none-valued entries from dicts. + +Parameters +---------- +data : Union[dict, list] + The data to be cleaned up. + +no_remove_id: bool = False + Do not remove ``id`` keys. + +no_id_name: bool = False + Do not turn id-name dicts into simple name strings. + +no_remove_none: bool = False + Do not remove ``None`` entries from dicts. + +Returns +------- +out : Union[dict, list] + The input object, but cleaned. This function works in place. + + """ + + if not isinstance(data, dict) and not isinstance(data, list): + raise ValueError(f"Data must be a dict or list, is: {type(data)}") + + # Id-name 2-dict replacement + def _is_id_name(element): + """Return True if ``element`` is an id-name dict.""" + if not isinstance(element, dict): + return False + return set(element.keys()) == {"id", "name"} + + if not no_id_name: + for idx, element in data.items() if isinstance(data, dict) else enumerate(data): + if _is_id_name(element): + data[idx] = element["name"] + + # Remove "id" from dicts + if not no_remove_id: + for element in data.values() if isinstance(data, dict) else data: + if isinstance(element, dict): + element.pop("id", None) + + # Remove None from dicts + if (not no_remove_none) and isinstance(data, dict): + to_remove = [key for key, value in data.items() if value is None] + for key in to_remove: + data.pop(key) + + # Recurse for all elements + for element in data.values() if isinstance(data, dict) else data: + if isinstance(element, dict) or isinstance(element, list): + clean_json(element, + no_remove_id=no_remove_id, no_id_name=no_id_name, + no_remove_none=no_remove_none) + + return data diff --git a/unittests/test_high_level_api.py b/unittests/test_high_level_api.py index e35dc678f7d0f44d1bb8fa763cf8dfc8225e3aee..fed87205a7ea7d87430d3720e5c2c32908f8ed5c 100644 --- a/unittests/test_high_level_api.py +++ b/unittests/test_high_level_api.py @@ -25,7 +25,9 @@ import linkahead as db from linkahead.high_level_api import (convert_to_entity, convert_to_python_object, - new_high_level_entity) + new_high_level_entity, + clean_json, + ) from linkahead.high_level_api import (CaosDBPythonUnresolvedParent, CaosDBPythonUnresolvedReference, CaosDBPythonRecord, CaosDBPythonFile, @@ -681,3 +683,86 @@ def test_cyclic_references(): # This would have lead to a recursion error before adding the detection for # cyclic references: r = convert_to_python_object(r1) + + +def test_clean_json(): + """For the json cleaner function.""" + + # Sample objects + obj1 = db.Record() + + # Basic tests with empty content + data = [] + data = clean_json(data) + assert data == [] + + data = {} + data = clean_json(data) + assert data == {} + + data = [{}] + data = clean_json(data) + assert data == [{}] + + data = {1: []} + data = clean_json(data) + assert data == {1: []} + + data = {"id": 123, "name": "foo"} + result = clean_json(data) + assert result == {"id": 123, "name": "foo"} + + # Simple removal and options test + data = [{"id": 123, "name": "foo"}] + result = clean_json(data) + assert result == ["foo"] + + data = [{"id": 123, "name": obj1}] # Strictly speaking, this is no json any more. + result = clean_json(data, no_id_name=True) + assert result == [{"name": obj1}] + + data = [{"id": 123, "other_key": "value"}] + result = clean_json(data) + assert result == [{"other_key": "value"}] + + data = [{"id": 123, "other_key": "value"}] + result = clean_json(data, no_remove_id=True) + assert result == [{"id": 123, "other_key": "value"}] + + data = [{"key": None, "other_key": "value"}] + result = clean_json(data) + assert result == [{"other_key": "value"}] + + data = [{"key": None, "other_key": "value"}] + result = clean_json(data, no_remove_none=True) + assert result == [{"key": None, "other_key": "value"}] + + # Additional keys. + data = [{"id": 123, "name": "foo", "not-a-name": "something"}] + result = clean_json(data) + assert result == [{"name": "foo", "not-a-name": "something"}] + + # Recursion + data = [{"id": 123, "nested": {"id": 456, "name": "bar"}}] + result = clean_json(data) + assert result == [{"nested": "bar"}] + + data = [{"id": 123, "name": "foo"}, {"id": 456, "name": "bar"}] + result = clean_json(data) + assert result == ["foo", "bar"] + + data = [{"id": 123, "name": "foo"}, {"id": 456, "name": "bar"}] + result = clean_json(data, no_id_name=True) + assert result == [{"name": "foo"}, {"name": "bar"}] + + data = [{"id": 123, "name": "foo"}, {"id": 456, "name": "bar"}, + {"id": 789, "name": "bar", "not-a-name": "something"}] + result = clean_json(data, no_remove_id=True) + assert result == ["foo", "bar", + {"id": 789, "name": "bar", "not-a-name": "something"}] + + # with pytest.raises(RecursionError): # No recursion detection implemented. + # data = [] + # data.append(data) + # sys.setrecursionlimit(300) # Default would be slow, 150 works, so take 300 + # clean_json(data)