diff --git a/CHANGELOG.md b/CHANGELOG.md index ce244bd7a1dbc0215019fba9fe3fe637ca266b4b..4a067139f4416c1a2febfc9d85398572fc1b1511 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed * Minimal changes to the error messages for invalid user names and passwords. +* Java query cache is now user specific when necessary. Thus, also more complex + queries are cached than before. ### Deprecated @@ -46,8 +48,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 used. ### Security -* Query cache is now user specific. Thus, intermediate query results are not - reused for users (with potentially other permissions). ## [0.7.3] - 2022-05-03 (Daniel Hornung) diff --git a/src/main/java/org/caosdb/server/query/Query.java b/src/main/java/org/caosdb/server/query/Query.java index 6861f081038b031f2bf15c211cc23a0e93113e14..d462a0b62683b4c838856b649f7afc2f0762727a 100644 --- a/src/main/java/org/caosdb/server/query/Query.java +++ b/src/main/java/org/caosdb/server/query/Query.java @@ -35,10 +35,12 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.NoSuchElementException; import java.util.Set; import java.util.UUID; import org.antlr.v4.runtime.CharStreams; @@ -211,14 +213,67 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac } } - public static class IdVersionPair { - public IdVersionPair(final Integer id, final String version) { + /** + * A class for iterating over {@link ResultSet} + * + * <p>{@link ResultSet} only provides a `next` function which moves the cursor. The behavior is + * here mapped onto the functions of the Iterator interface. TODO Move this generic function? + * Check again if an implementation is available from elsewhere. + */ + public static class ResultSetIterator implements Iterator<IdVersionAclTriplet> { + public ResultSetIterator(final ResultSet resultset) { + this.resultSet = resultset; + } + + private ResultSet resultSet; + private boolean cursorHasMoved = false; + private boolean currentIsValid = true; + + public boolean hasNext() { + if (!this.cursorHasMoved) { + try { + this.currentIsValid = this.resultSet.next(); + } catch (SQLException e) { + throw new TransactionException(e); + } + this.cursorHasMoved = true; + } + return this.currentIsValid; + }; + + public IdVersionAclTriplet next() { + if (!this.cursorHasMoved) { + try { + this.currentIsValid = this.resultSet.next(); + } catch (SQLException e) { + throw new TransactionException(e); + } + } + this.cursorHasMoved = false; + if (!this.currentIsValid) { + throw new NoSuchElementException(); + } + try { + final Integer id = resultSet.getInt("id"); + final String acl_str = bytes2UTF8(resultSet.getBytes("ACL")); + return new IdVersionAclTriplet(id, "", acl_str); + } catch (SQLException e) { + throw new TransactionException(e); + } + } + } + + /** A data class for storing triplets of (Entity ID, version hash, ACL string) */ + public static class IdVersionAclTriplet { + public IdVersionAclTriplet(final Integer id, final String version, final String acl) { this.id = id; this.version = version; + this.acl = acl; } public Integer id; public String version; + public String acl; @Override public String toString() { @@ -230,9 +285,16 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac @Override public boolean equals(final Object obj) { - if (obj instanceof IdVersionPair) { - final IdVersionPair that = (IdVersionPair) obj; - return this.id == that.id && this.version == that.version; + if (obj instanceof IdVersionAclTriplet) { + final IdVersionAclTriplet that = (IdVersionAclTriplet) obj; + // checking ID and version hash should be sufficient + if (this.id == that.id && this.version == that.version) { + if (this.acl != that.acl) { + throw new RuntimeException("Implementation error! ACL should not differ"); + } + return true; + } + ; } return false; } @@ -249,7 +311,7 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac .equalsIgnoreCase("FALSE"); private final Logger logger = org.slf4j.LoggerFactory.getLogger(getClass()); - List<IdVersionPair> resultSet = null; + List<IdVersionAclTriplet> resultSet = null; private final String query; private Pattern entity = null; private Role role = null; @@ -265,6 +327,18 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac private final ArrayList<ToElementable> messages = new ArrayList<>(); private Access access; private boolean versioned = false; + /** + * This key-value cache stores lists of of (id, version hash, acl string) triplets. Those values + * are the result sets of queries. The keys are created such that they are different for different + * for different queries (@see {@link getCacheKey}). The key includes realm and username of a + * subject, if the query result must not be shared among users. If intermediate permission checks + * are done (e.g. in a subproperty query filter), the query result will be stored using a user + * specific key. The final permission check has not yet been applied to the result set that is + * stored in the cache. This allows (some) cache entries to be shared among users since the final + * check is applied after the retrieval of the result set from the cache (@see {@link + * filterEntitiesWithoutRetrievePermission}) The cache is invalidated whenever there is a write + * operation (@see {@link writeTODO}). + */ private static ICacheAccess<String, Serializable> cache = Cache.getCache("HIGH_LEVEL_QUERY_CACHE"); /** Cached=true means that the results of this query have actually been pulled from the cache. */ @@ -274,7 +348,7 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * query evaluation contains complex permission checking which could only be cached on a per-user * basis (maybe in the future). */ - private boolean cachable = true; + private boolean filteredIntermediateResult = false; /** * Tags the query cache and is renewed each time the cache is being cleared, i.e. each time the @@ -344,9 +418,9 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac /** * @return The result set table name. - * @throws QueryException + * @throws TransactionException */ - private String sourceStrategy(final String sourceSet) throws QueryException { + private String sourceStrategy(final String sourceSet) throws TransactionException { try { this.sourceSet = sourceSet; initResultSetWithNameIDAndChildren(); @@ -372,7 +446,7 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac } /** - * Finds all QueryTemplates in the resultSet and applies them to the same resultSet. The ids of + * Finds all QueryTemplates in the resultSet and applies them to the same resultSet. The IDs of * the QueryTemplates themselves are then removed from the resultSet. If the current user doesn't * have the RETRIEVE:ENTITY permission for a particular QueryTemplate it will be ignored. * @@ -414,7 +488,7 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac union(query, resultSet, subResultSet); } } catch (final SQLException e) { - throw new TransactionException(e); + throw new QueryException(e); } } @@ -566,7 +640,7 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * * <ol> * <li>FIND * -> FIND ENTITY (which basically prevents to copy the complete entity table just to - * read out the ids immediately). + * read out the IDs immediately). * </ol> */ public void optimize() { @@ -592,33 +666,44 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac } /** - * Generate a SQL statement which reads out the resulting ids (and version ids if `versioned` is - * true). + * Generate a SQL statement which reads out the resulting IDs and ACL strings (and version IDs if + * `versioned` is true). * - * <p>If the parameter `resultSetTableName` is "entities" actually the entity_version table is - * used to fetch all ids. + * <p>There are four variants: Where the parameter `resultSetTableName` is "entities" and + * otherwise and where `versioned` is true and otherwise. * * @param resultSetTableName name of the table with all the resulting entities * @param versioned whether the query was versioned * @return an SQL statement - * @throws QueryException */ private String generateSelectStatementForResultSet( final String resultSetTableName, final boolean versioned) { if (resultSetTableName.equals("entities")) { - return "SELECT entity_id AS id" - + (versioned ? ", version AS version" : "") - + " FROM entity_version" - + (versioned ? "" : " WHERE `_iversion` = 1"); + final String baseStatement = + "SELECT entities.id, entity_acl.acl FROM entities INNER JOIN entity_acl ON entity_acl.id=entities.acl"; + if (!versioned) { + return baseStatement + ";"; + } + // if versioned, the statement is surrounded with another SELECT and JOIN + return ("SELECT id, acl, version FROM (" + + baseStatement + + "( AS tmp JOIN entity_version ON entity_version.entity_id=tmp.id;"); + } else { + final String baseStatement = + (" SELECT tmp.id, entity_acl.acl, _iversion FROM " + + " (SELECT results.entity_id AS id, entities.acl AS acl_id, _iversion FROM `" + + resultSetTableName + + "` AS results JOIN entities ON results.entity_id=entities.id) AS tmp" + + " JOIN entity_acl ON entity_acl.id=tmp.acl_id"); + if (!versioned) { + return baseStatement + ";"; + } + // if versioned, the statement is surrounded with another SELECT and JOIN + return ("SELECT tmp2.id, acl, version FROM(" + + baseStatement + + ") as tmp2 " + + "join entity_version on (entity_version.entity_id=tmp2.id AND tmp2._iversion = entity_version._iversion);"); } - return "SELECT results.id AS id" - + (versioned ? ", ev.version AS version" : "") - + " FROM `" - + resultSetTableName - + "` AS results" - + (versioned - ? " JOIN entity_version AS ev ON (results.id = ev.entity_id AND results._iversion = ev._iversion)" - : ""); } /** @@ -629,17 +714,19 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * @return list of results of this query. * @throws QueryException */ - private List<IdVersionPair> getResultSet(final String resultSetTableName, final boolean versioned) - throws QueryException { + private List<IdVersionAclTriplet> getResultSet( + final String resultSetTableName, final boolean versioned) throws QueryException { ResultSet finishResultSet = null; try { final String sql = generateSelectStatementForResultSet(resultSetTableName, versioned); final PreparedStatement finish = getConnection().prepareStatement(sql); finishResultSet = finish.executeQuery(); - final List<IdVersionPair> rs = new LinkedList<>(); + final List<IdVersionAclTriplet> rs = new LinkedList<>(); while (finishResultSet.next()) { final String version = versioned ? finishResultSet.getString("version") : null; - rs.add(new IdVersionPair(finishResultSet.getInt("id"), version)); + final String acl = finishResultSet.getString("acl"); + + rs.add(new IdVersionAclTriplet(finishResultSet.getInt("id"), version, acl)); } return rs; } catch (final SQLException e) { @@ -661,11 +748,54 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * * @see {@link NoCache} * @return true if caching is encouraged. + * <p>TODO: Why is this a property of the Access class? Suggestion: Either remove useCache + * here and call directly the function of Access or move the property here. */ private boolean useCache() { return getAccess().useCache(); } + /** + * Try to set the `resultSet` member variable using the values stored in the high level query + * cache. + */ + private void getResultFromCache() { + // try key with username and realm + this.resultSet = getCached(getCacheKey(true)); + if (this.resultSet == null) { + // try key without username and realm + this.resultSet = getCached(getCacheKey(false)); + } + } + /** Store the content of `resultSet` member in the high level query cache. */ + private void storeResultInCache() { + // Decide whether user specific cache needs to be used or not + // Currently, this is solely determined via filteredIntermediateResult. + if (this.filteredIntermediateResult) { + setCache(getCacheKey(true), this.resultSet); + } else { + setCache(getCacheKey(false), this.resultSet); + } + } + /** Fill entities from `resultSet` into `container`. */ + private void fillContainerWithResult() { + if (this.container != null && this.type == Type.FIND) { + for (final IdVersionAclTriplet t : this.resultSet) { + // ignore internal entities + if (t.id < 100) { + continue; + } + final Entity e = new RetrieveEntity(t.id, t.version); + + // if query has select-clause: + if (this.selections != null && !this.selections.isEmpty()) { + e.addSelections(this.selections); + } + this.container.add(e); + } + } + } + /** * Execute the query. * @@ -673,46 +803,30 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * * @param access * @return - * @throws ParsingException + * @throws ParsingException, QueryException */ - public Query execute(final Access access) throws ParsingException { + public Query execute(final Access access) throws ParsingException, QueryException { try { parse(); setAccess(access); if (useCache()) { - this.resultSet = getCached(getCacheKey()); + getResultFromCache(); } - - if (this.resultSet == null) { - executeNoCache(access); - if (this.cachable) { - setCache(getCacheKey(), this.resultSet); - } - this.logger.debug("Uncached query {}", this.query); - } else { + if (this.resultSet != null) { this.logger.debug("Using cached result for {}", this.query); this.cached = true; - } - // Fill resulting entities into container - if (this.container != null && this.type == Type.FIND) { - for (final IdVersionPair p : this.resultSet) { - - if (p.id > 99) { - final Entity e = new RetrieveEntity(p.id, p.version); - - // if query has select-clause: - if (this.selections != null && !this.selections.isEmpty()) { - e.addSelections(this.selections); - } - this.container.add(e); - } - } + } else { + executeQueryInBackend(access); + storeResultInCache(); + this.logger.debug("Uncached query {}", this.query); } + filterEntitiesWithoutRetrievePermission(this.resultSet); + fillContainerWithResult(); } catch (final SQLException e) { e.printStackTrace(); - throw new TransactionException(e); + throw new QueryException(e); } return this; } @@ -728,33 +842,32 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * @param key * @param resultSet */ - private void setCache(final String key, final List<IdVersionPair> resultSet) { + // TODO is this a good name? + private void setCache(final String key, final List<IdVersionAclTriplet> resultSet) { synchronized (cache) { if (resultSet instanceof Serializable) { cache.put(key, (Serializable) resultSet); } else { + // TODO is this ever used? cache.put(key, new ArrayList<>(resultSet)); } } } /** - * Retrieve a result set of entity ids (and the version) from the cache. + * Retrieve a result set of entity IDs (and the version) from the cache. * * @param key * @return */ @SuppressWarnings("unchecked") - private List<IdVersionPair> getCached(final String key) { - return (List<IdVersionPair>) cache.get(key); + private List<IdVersionAclTriplet> getCached(final String key) { + return (List<IdVersionAclTriplet>) cache.get(key); } - protected void executeNoCache(final Access access) throws SQLException { + protected void executeQueryInBackend(final Access access) throws SQLException { try { - final String tabname = executeStrategy(this.versioned); - filterEntitiesWithoutRetrievePermission(tabname); - - this.resultSet = getResultSet(tabname, this.versioned); + this.resultSet = getResultSet(executeStrategy(this.versioned), this.versioned); } finally { cleanUp(); } @@ -794,14 +907,14 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * Filter out all entities which may not be retrieved by this user due to a missing RETRIEVE * permission. This one is also designed for filtering of intermediate results. * - * @param resultSet + * @param tabname * @throws SQLException */ - public void filterEntitiesWithoutRetrievePermission(final String resultSet) throws SQLException { + public void filterEntitiesWithoutRetrievePermission(final String tabname) throws SQLException { if (!filterEntitiesWithoutRetrievePermisions) { return; } - cachable = false; + filteredIntermediateResult = true; /* * The following creates a table with the columns (entity ID, acl) from @@ -818,42 +931,76 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac ("SELECT entity_n_acl.id, entity_acl.acl from " + "(select entities.id, entities.acl from entities " + "inner join `" - + resultSet + + tabname + "` as rs on entities.id=rs.id) " + "as entity_n_acl " + "left join entity_acl on entity_n_acl.acl=entity_acl.id;"); - final ResultSet entities_with_acl = stmt.executeQuery(query); + final ResultSet entitiesRS = stmt.executeQuery(query); + final ResultSetIterator entitiesWithACL = new ResultSetIterator(entitiesRS); + final List<Integer> toBeDeleted = collectIdsWithoutPermission(entitiesWithACL); + entitiesRS.close(); + // TODO is there a better way than the following? + for (final Integer id : toBeDeleted) { + stmt.execute("DELETE FROM `" + tabname + "` WHERE id = " + id); + } + } + + /** + * Creates a new list that contains only the entities from the @resultSet for which the current + * subject has RETRIEVE permission. + * + * <p>Note, unlike the public version of this function @resultSet is not altered but the filtered + * list is returned. + * + * @param resultSet + * @return list without the entities with insufficient permissions + */ + private List<IdVersionAclTriplet> filterEntitiesWithoutRetrievePermission( + final List<IdVersionAclTriplet> resultSet) { + final List<Integer> toBeDeleted = collectIdsWithoutPermission(resultSet.iterator()); + final List<IdVersionAclTriplet> filtered = new ArrayList<>(); + for (final IdVersionAclTriplet triplet : resultSet) { + if (-1 == toBeDeleted.indexOf(triplet.id)) { + filtered.add(triplet); + } + ; + } + return filtered; + } + /** + * Creates a list with IDs of those entities that do not have sufficient RETRIEVE permission + * + * @param entityIterator Iterator over the result set consisting of (ID, version hash, acl string) + * triplets. + * @return compiled list + */ + private List<Integer> collectIdsWithoutPermission(Iterator<IdVersionAclTriplet> entityIterator) { final HashMap<String, Boolean> acl_cache = new HashMap<String, Boolean>(); final List<Integer> toBeDeleted = new LinkedList<Integer>(); - - while (entities_with_acl.next()) { + while (entityIterator.hasNext()) { final long t1 = System.currentTimeMillis(); - final Integer id = entities_with_acl.getInt("id"); - if (id <= 99) { + + final IdVersionAclTriplet triplet = entityIterator.next(); + if (triplet.id <= 99) { continue; } - final String acl_str = bytes2UTF8(entities_with_acl.getBytes("ACL")); - if (!acl_cache.containsKey(acl_str)) { + if (!acl_cache.containsKey(triplet.acl)) { acl_cache.put( - acl_str, - EntityACL.deserialize(acl_str) + triplet.acl, + EntityACL.deserialize(triplet.acl) .isPermitted(this.getUser(), EntityPermission.RETRIEVE_ENTITY)); } - if (!acl_cache.get(acl_str)) { - toBeDeleted.add(id); + if (!acl_cache.get(triplet.acl)) { + toBeDeleted.add(triplet.id); } final long t2 = System.currentTimeMillis(); this.addBenchmark("filterEntitiesWithoutRetrievePermission", t2 - t1); } - entities_with_acl.close(); - // TODO is there a better way than the following? - for (final Integer id : toBeDeleted) { - stmt.execute("DELETE FROM `" + resultSet + "` WHERE id = " + id); - } + return toBeDeleted; } @Override @@ -1005,9 +1152,9 @@ public class Query implements QueryInterface, ToElementable, TransactionInterfac * * @return A Cache key. */ - String getCacheKey() { + String getCacheKey(boolean addUser) { final StringBuilder sb = new StringBuilder(); - if (this.user != null) { + if (addUser && (this.user != null)) { String principal_desc = ((Principal) this.user.getPrincipal()).getUsername() + Principal.REALM_SEPARATOR diff --git a/src/test/java/org/caosdb/server/query/QueryTest.java b/src/test/java/org/caosdb/server/query/QueryTest.java index 8f3d4946c36d795e5e7bd9048a66f86cc0290a0b..c1e53294ca4731c84ed0106c26765ec300731bb2 100644 --- a/src/test/java/org/caosdb/server/query/QueryTest.java +++ b/src/test/java/org/caosdb/server/query/QueryTest.java @@ -42,7 +42,7 @@ public class QueryTest { String getCacheKey(String query) { Query q = new Query(query); q.parse(); - return q.getCacheKey(); + return q.getCacheKey(true); } @Test