Skip to content

Commit

Permalink
Implement query validation endpoint (#2645)
Browse files Browse the repository at this point in the history
* Implement query validation endpoint

Implement the query/{logicName}/validate endpoint. This feature supports
the ability to configure validation rules that will validate LUCENE and
JEXL queries against a number of criteria and provide meaningful
feedback to customers.

Closes #2585

---------

Co-authored-by: Seth Smucker <98466968+SethSmucker@users.noreply.github.com>
Co-authored-by: Ivan Bella <347158+ivakegg@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 16, 2025
1 parent 4ade38a commit 9776389
Show file tree
Hide file tree
Showing 92 changed files with 11,431 additions and 396 deletions.
26 changes: 26 additions & 0 deletions core/query/src/main/java/datawave/core/query/logic/QueryLogic.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.commons.collections4.Transformer;
import org.apache.commons.collections4.iterators.TransformIterator;

import datawave.audit.SelectorExtractor;
Expand All @@ -24,6 +25,7 @@
import datawave.webservice.query.exception.QueryException;
import datawave.webservice.query.result.event.ResponseObjectFactory;
import datawave.webservice.result.BaseResponse;
import datawave.webservice.result.QueryValidationResponse;

public interface QueryLogic<T> extends Iterable<T>, Cloneable, ParameterValidator {

Expand Down Expand Up @@ -481,4 +483,28 @@ default void preInitialize(Query settings, Set<Authorizations> userAuthorization

void setServerUser(ProxiedUserDetails serverUser);

/**
* Validates the given query according to the validation criteria established for the query logic.
*
* @param client
* the Accumulo connector to use for this query
* @param query
* the query settings (query, begin date, end date, etc.)
* @param auths
* the authorizations that have been calculated for this query based on the caller and server.
* @return a list of messages detailing any issues found for the query
*/
default Object validateQuery(AccumuloClient client, Query query, Set<Authorizations> auths) throws Exception {
throw new UnsupportedOperationException("Query validation not implemented");
}

/**
* Return a transformer that will convert the result of {@link QueryLogic#validateQuery(AccumuloClient, Query, Set)} to a {@link QueryValidationResponse}.
*
* @return the transformer
*/
default Transformer<Object,QueryValidationResponse> getQueryValidationResponseTransformer() {
throw new UnsupportedOperationException("Query validation response transformer not implemented");
}

}
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
<version.datawave.accumulo-utils>4.0.0</version.datawave.accumulo-utils>
<version.datawave.audit-api>4.0.0</version.datawave.audit-api>
<version.datawave.authorization-api>4.0.0</version.datawave.authorization-api>
<version.datawave.base-rest-responses>4.0.0</version.datawave.base-rest-responses>
<version.datawave.base-rest-responses>4.0.1</version.datawave.base-rest-responses>
<version.datawave.common-utils>3.0.0</version.datawave.common-utils>
<version.datawave.dictionary-api>4.0.1</version.datawave.dictionary-api>
<version.datawave.mapreduce-query-api>1.0.0</version.datawave.mapreduce-query-api>
Expand Down Expand Up @@ -1584,7 +1584,7 @@
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<artifactId>junit-jupiter</artifactId>
<version>${version.junit.bom}</version>
</dependency>
<dependency>
Expand Down Expand Up @@ -1694,7 +1694,7 @@
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<artifactId>junit-jupiter</artifactId>
<version>${version.junit.bom}</version>
</dependency>
<dependency>
Expand Down
5 changes: 5 additions & 0 deletions warehouse/query-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,11 @@
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
Expand Down
31 changes: 31 additions & 0 deletions warehouse/query-core/src/main/java/datawave/query/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,30 @@ public class Constants {

public static final String PIPE = "|";

public static final String ASTERISK = "*";

public static final String QUOTE = "\"";

/**
* The UTF-16 representation of the opening smart quote ASCII-147.
*/
public static final String UTF_16_SMART_QUOTE_LEFT = "\\u0093";

/**
* The UTF-16 representation of the ending smart quote ASCII-148.
*/
public static final String UTF_16_SMART_QUOTE_RIGHT = "\\u0094";

/**
* The UTF-16 representation of the opening double quote.
*/
public static final String UTF_16_DOUBLE_QUOTE_LEFT = "\\u201c";

/**
* The UTF-16 representation of the ending double quote.
*/
public static final String UTF_16_DOUBLE_QUOTE_RIGHT = "\\u201d";

public static final Text TEXT_NULL = new Text(NULL);

public static final Text FI_PREFIX = new Text("fi");
Expand Down Expand Up @@ -95,4 +119,11 @@ public class Constants {
public static final String END_DATE = "end.date";

public static final String COLUMN_VISIBILITY = "columnVisibility";

public static final Character BACKSLASH_CHAR = '\\';
public static final Character ASTERISK_CHAR = '*';

public static final String JEXL = "JEXL";
public static final String LUCENE = "LUCENE";
public static final String LUCENE_UUID = "LUCENE-UUID";
}
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ public Set<String> fieldsForNormalization(MetadataHelper helper, Set<String> dat
public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
FunctionJexlNodeVisitor functionMetadata = new FunctionJexlNodeVisitor();
node.jjtAccept(functionMetadata, null);
Set<String> fields = Sets.newHashSet();
// Maintain insertion order.
Set<String> fields = Sets.newLinkedHashSet();

List<JexlNode> arguments = functionMetadata.args();
if (MATCHCOUNTOF.equals(functionMetadata.name())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.util.DateIndexHelper;
import datawave.query.util.MetadataHelper;
import datawave.util.StringUtils;

public class QueryFunctionsDescriptor implements JexlFunctionArgumentDescriptorFactory {

Expand Down Expand Up @@ -207,6 +208,15 @@ public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
case QueryFunctions.NO_EXPANSION:
case QueryFunctions.LENIENT_FIELDS_FUNCTION:
case QueryFunctions.STRICT_FIELDS_FUNCTION:
case QueryFunctions.EXCERPT_FIELDS_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_DAY_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_HOUR_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_SECOND_FUNCTION:
case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION:
// In practice each of these functions should be parsed from the query
// almost immediately. This implementation is added for consistency
for (JexlNode arg : args) {
Expand Down Expand Up @@ -236,6 +246,12 @@ public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
}
}
break;
case QueryFunctions.RENAME_FUNCTION:
for (JexlNode arg : args) {
String value = JexlNodes.getIdentifierOrLiteralAsString(arg);
String[] parts = StringUtils.split(value, Constants.EQUALS);
fields.add(parts[0]);
}
case QueryFunctions.MATCH_REGEX:
case BETWEEN:
case LENGTH:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package datawave.query.jexl.visitors;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import java.util.StringJoiner;

import org.apache.commons.jexl3.parser.ASTFunctionNode;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.lang3.tuple.Pair;

import com.google.common.collect.LinkedHashMultimap;

import datawave.query.jexl.functions.FunctionJexlNodeVisitor;
import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory;
import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor;
import datawave.query.util.MetadataHelper;

/**
* A visitor that fetches all fields from the specified functions.
*/
public class FetchFunctionFieldsVisitor extends ShortCircuitBaseVisitor {

private final Set<Pair<String,String>> functions;
private final MetadataHelper metadataHelper;
// Maintain insertion order.
private final LinkedHashMultimap<Pair<String,String>,String> fields = LinkedHashMultimap.create();

/**
* Fetch the fields seen in the specified functions.
*
* @param query
* the query tree
* @param functions
* the set of {@code <namespace, function>} pairs to filter on
* @param metadataHelper
* the metadata helper
* @return the set of fields found within the functions
*/
public static Set<FunctionFields> fetchFields(JexlNode query, Set<Pair<String,String>> functions, MetadataHelper metadataHelper) {
if (query != null) {
FetchFunctionFieldsVisitor visitor = new FetchFunctionFieldsVisitor(functions, metadataHelper);
query.jjtAccept(visitor, functions);
return visitor.getFunctionFields();
} else {
return Collections.emptySet();
}
}

private FetchFunctionFieldsVisitor(Set<Pair<String,String>> functions, MetadataHelper metadataHelper) {
if (functions == null || functions.isEmpty()) {
this.functions = Collections.emptySet();
} else {
this.functions = new HashSet<>();
functions.forEach((p) -> this.functions.add(Pair.of(p.getLeft(), p.getRight())));
}
this.metadataHelper = metadataHelper;
}

@Override
public Object visit(ASTFunctionNode node, Object data) {
FunctionJexlNodeVisitor visitor = new FunctionJexlNodeVisitor();
node.jjtAccept(visitor, null);

Pair<String,String> function = Pair.of(visitor.namespace(), visitor.name());
// If we are either not filtering out functions, or the function filters contains the functions, fetch the fields.
if (functions.isEmpty() || functions.contains(function)) {
JexlArgumentDescriptor desc = JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node);
Set<String> fields = desc.fields(metadataHelper, null);
// Add the fields to the function.
if (!fields.isEmpty()) {
this.fields.putAll(function, fields);
}
}
return null;
}

// Returns the fields map as a set of FunctionFields.
private Set<FunctionFields> getFunctionFields() {
// Maintain insertion order.
Set<FunctionFields> functionFields = new LinkedHashSet<>();
for (Pair<String,String> function : fields.keySet()) {
functionFields.add(new FunctionFields(function.getLeft(), function.getRight(), fields.get(function)));
}
return functionFields;
}

public static class FunctionFields {
private final String namespace;
private final String function;
private final Set<String> fields;

public static FunctionFields of(String namespace, String function, String... fields) {
return new FunctionFields(namespace, function, Arrays.asList(fields));
}

private FunctionFields(String namespace, String function, Collection<String> fields) {
this.namespace = namespace;
this.function = function;
// Maintain insertion order.
this.fields = fields.isEmpty() ? Collections.emptySet() : Collections.unmodifiableSet(new LinkedHashSet<>(fields));
}

public String getNamespace() {
return namespace;
}

public String getFunction() {
return function;
}

public Set<String> getFields() {
return fields;
}

@Override
public boolean equals(Object object) {
if (this == object) {
return true;
}
if (object == null || getClass() != object.getClass()) {
return false;
}
FunctionFields that = (FunctionFields) object;
return Objects.equals(namespace, that.namespace) && Objects.equals(function, that.function) && Objects.equals(fields, that.fields);
}

@Override
public int hashCode() {
return Objects.hash(namespace, function, fields);
}

@Override
public String toString() {
return new StringJoiner(", ", FunctionFields.class.getSimpleName() + "[", "]").add("namespace='" + namespace + "'")
.add("function='" + function + "'").add("fields=" + fields).toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
Expand Down Expand Up @@ -66,7 +67,8 @@ public FieldMissingFromSchemaVisitor(MetadataHelper helper, Set<String> datatype
@SuppressWarnings("unchecked")
public static Set<String> getNonExistentFields(MetadataHelper helper, ASTJexlScript script, Set<String> datatypes, Set<String> specialFields) {
FieldMissingFromSchemaVisitor visitor = new FieldMissingFromSchemaVisitor(helper, datatypes, specialFields);
return (Set<String>) script.jjtAccept(visitor, new HashSet<>());
// Maintain insertion order.
return (Set<String>) script.jjtAccept(visitor, new LinkedHashSet<>());
}

/**
Expand Down
Loading

0 comments on commit 9776389

Please sign in to comment.