diff --git a/CHANGELOG.md b/CHANGELOG.md index 82b2454c03a5ef05326a9682abfbe9c578b07cf3..858790276e69616205d22401781726b739936a52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### +* New `page_length` parameter for `caosdb.execute_query` and + `caosdb.Query.execute`. See docstrings for more details. * `Entity.remove_value_from_property` function that removes a given value from a property and optionally removes the property if it is empty afterwards. diff --git a/src/caosdb/common/models.py b/src/caosdb/common/models.py index be9c9c6a9d4ba79b52dee88ddda15440b05b19f4..547a83f06d1af90223b621bc341b39c52f156758 100644 --- a/src/caosdb/common/models.py +++ b/src/caosdb/common/models.py @@ -54,13 +54,14 @@ from ..configuration import get_config from ..connection.connection import get_connection from ..connection.encode import MultipartParam, multipart_encode from ..exceptions import (AmbiguousEntityError, AuthorizationError, - ConsistencyError, EmptyUniqueQueryError, - EntityDoesNotExistError, EntityError, - EntityHasNoDatatypeError, HTTPURITooLongError, - LinkAheadConnectionError, LinkAheadException, - MismatchingEntitiesError, QueryNotUniqueError, - TransactionError, UniqueNamesError, - UnqualifiedParentsError, UnqualifiedPropertiesError) + CaosDBConnectionError, CaosDBException, + ConsistencyError, EmptyUniqueQueryError, + EntityDoesNotExistError, EntityError, + EntityHasNoDatatypeError, HTTPURITooLongError, + MismatchingEntitiesError, QueryNotUniqueError, + TransactionError, UniqueNamesError, + UnqualifiedParentsError, PagingConsistencyError, + UnqualifiedPropertiesError) from .datatype import BOOLEAN, DATETIME, DOUBLE, INTEGER, TEXT, is_reference, is_list_datatype from .state import State from .timezone import TimeZone @@ -4347,7 +4348,34 @@ class Query(): else: self.q = q - def execute(self, unique=False, raise_exception_on_error=True, cache=True): + def _query_request(self, query_dict): + """Used internally to execute the query request...""" + _log_request("GET Entity?" + str(query_dict), None) + connection = get_connection() + http_response = connection.retrieve( + entity_uri_segments=["Entity"], + query_dict=query_dict) + cresp = Container._response_to_entities(http_response) + return cresp + + def _paging_generator(self, first_page, query_dict, page_length): + """Used internally to create a generator of pages instead instead of a + container which contais all the results.""" + if len(first_page) == 0: + return # empty page + yield first_page + index = page_length + while self.results > index: + query_dict["P"] = f"{index}L{page_length}" + next_page = self._query_request(query_dict) + etag = next_page.query.etag + if etag is not None and etag != self.etag: + raise PagingConsistencyError("The database state changed while retrieving the pages") + yield next_page + index += page_length + + def execute(self, unique=False, raise_exception_on_error=True, cache=True, + page_length=None): """Execute a query (via a server-requests) and return the results. Parameters @@ -4360,8 +4388,24 @@ class Query(): Whether an exception should be raises when there are errors in the resulting entities. Defaults to True. cache : bool - Whether to use the query cache (equivalent to adding a "cache" - flag) to the Query object. Defaults to True. + Whether to use the server-side query cache (equivalent to adding a + "cache" flag) to the Query object. Defaults to True. + page_length : int + Whether to use paging. If page_length > 0 this method returns a + generator (to be used in a for-loop or with list-comprehension). + The generator yields containers with up to page_length entities. + Otherwise, paging is disabled, as well as for count queries and + when unique is True. Defaults to None. + + Raises: + ------- + PagingConsistencyError + If the database state changed between paged requests. + + Yields + ------ + page : Container + Returns a container with the next `page_length` resulting entities. Returns ------- @@ -4369,8 +4413,6 @@ class Query(): Returns an integer when it was a `COUNT` query. Otherwise, returns a Container with the resulting entities. """ - connection = get_connection() - flags = self.flags if cache is False: @@ -4378,18 +4420,21 @@ class Query(): query_dict = dict(flags) query_dict["query"] = str(self.q) - _log_request("GET Entity?" + str(query_dict), None) - http_response = connection.retrieve( - entity_uri_segments=["Entity"], - query_dict=query_dict) - cresp = Container._response_to_entities(http_response) + has_paging = False + is_count_query = self.q.split()[0].lower() == "count" if len(self.q.split()) > 0 else False + + if not unique and not is_count_query and page_length is not None and page_length > 0: + has_paging = True + query_dict["P"] = f"0L{page_length}" + + # retreive first/only page + cresp = self._query_request(query_dict) + self.results = cresp.query.results self.cached = cresp.query.cached self.etag = cresp.query.etag - if self.q.lower().startswith('count') and len(cresp) == 0: - # this was a count query - + if is_count_query: return self.results if raise_exception_on_error: @@ -4411,10 +4456,14 @@ class Query(): return r self.messages = cresp.messages - return cresp + if has_paging: + return self._paging_generator(cresp, query_dict, page_length) + else: + return cresp -def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, flags=None): +def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, + flags=None, page_length=None): """Execute a query (via a server-requests) and return the results. Parameters @@ -4429,10 +4478,26 @@ def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, fl Whether an exception should be raised when there are errors in the resulting entities. Defaults to True. cache : bool - Whether to use the query cache (equivalent to adding a "cache" flag). - Defaults to True. + Whether to use the query server-side cache (equivalent to adding a + "cache" flag). Defaults to True. flags : dict of str Flags to be added to the request. + page_length : int + Whether to use paging. If page_length > 0 this method returns a + generator (to be used in a for-loop or with list-comprehension). + The generator yields containers with up to page_length entities. + Otherwise, paging is disabled, as well as for count queries and + when unique is True. Defaults to None. + + Raises: + ------- + PagingConsistencyError + If the database state changed between paged requests. + + Yields + ------ + page : Container + Returns a container with the next `page_length` resulting entities. Returns ------- @@ -4447,7 +4512,7 @@ def execute_query(q, unique=False, raise_exception_on_error=True, cache=True, fl return query.execute(unique=unique, raise_exception_on_error=raise_exception_on_error, - cache=cache) + cache=cache, page_length=page_length) class DropOffBox(list): diff --git a/src/caosdb/exceptions.py b/src/caosdb/exceptions.py index cf60f7fb26d83fb68404d784dc8275c4095f4f10..a6abe09edbbece2a38bdc6c5e1296a2b3dd81bde 100644 --- a/src/caosdb/exceptions.py +++ b/src/caosdb/exceptions.py @@ -162,11 +162,15 @@ class MismatchingEntitiesError(LinkAheadException): # ######################### Bad query errors ########################### - class BadQueryError(LinkAheadException): """Base class for query errors that are not transaction errors.""" +class PagingConsistencyError(BadQueryError): + """The database state changed between two consecutive paged requests of the + same query.""" + + class QueryNotUniqueError(BadQueryError): """A unique query or retrieve found more than one entity."""