From 6caea926e58d4e6d18332d0c9f3ab1cc09de11e8 Mon Sep 17 00:00:00 2001 From: Andrew Prudhomme Date: Mon, 6 Jan 2025 11:12:09 -0800 Subject: [PATCH] Add Term/TermInSet query support for DATE_TIME field (#797) * Add Term/TermInSet query support for DATE_TIME field * Unify doc value query building --- .../server/field/DateTimeFieldDef.java | 65 +++- .../field/properties/TermQueryable.java | 18 ++ .../server/query/QueryNodeMapper.java | 22 +- .../server/field/DateTimeFieldDefTest.java | 303 ++++++++++++++++++ .../field/registerFieldsDateTime.json | 19 ++ 5 files changed, 410 insertions(+), 17 deletions(-) diff --git a/src/main/java/com/yelp/nrtsearch/server/field/DateTimeFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/field/DateTimeFieldDef.java index f8a5d0c3e..bd803154c 100644 --- a/src/main/java/com/yelp/nrtsearch/server/field/DateTimeFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/field/DateTimeFieldDef.java @@ -20,6 +20,7 @@ import com.yelp.nrtsearch.server.doc.LoadedDocValues; import com.yelp.nrtsearch.server.field.properties.RangeQueryable; import com.yelp.nrtsearch.server.field.properties.Sortable; +import com.yelp.nrtsearch.server.field.properties.TermQueryable; import com.yelp.nrtsearch.server.grpc.*; import com.yelp.nrtsearch.server.grpc.Field; import java.io.IOException; @@ -32,6 +33,7 @@ import java.time.format.DateTimeParseException; import java.time.format.ResolverStyle; import java.time.temporal.ChronoField; +import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.*; import org.apache.lucene.facet.FacetField; @@ -46,7 +48,7 @@ /** Field class for 'DATE_TIME' field type. */ public class DateTimeFieldDef extends IndexableFieldDef - implements Sortable, RangeQueryable { + implements Sortable, RangeQueryable, TermQueryable { private static final String EPOCH_MILLIS = "epoch_millis"; private static final String STRICT_DATE_OPTIONAL_TIME = "strict_date_optional_time"; @@ -140,6 +142,67 @@ public Query getRangeQuery(RangeQuery rangeQuery) { return new IndexOrDocValuesQuery(pointQuery, dvQuery); } + @Override + public void checkTermQueriesSupported() { + if (!isSearchable() && !hasDocValues()) { + throw new IllegalStateException( + "Field \"" + + getName() + + "\" is not searchable or does not have doc values, which is required for TermQuery / TermInSetQuery"); + } + } + + @Override + public Query getTermQueryFromLongValue(long longValue) { + Query pointQuery = null; + Query dvQuery = null; + if (isSearchable()) { + pointQuery = LongPoint.newExactQuery(getName(), longValue); + if (!hasDocValues()) { + return pointQuery; + } + } + if (hasDocValues()) { + dvQuery = SortedNumericDocValuesField.newSlowExactQuery(getName(), longValue); + if (!isSearchable()) { + return dvQuery; + } + } + return new IndexOrDocValuesQuery(pointQuery, dvQuery); + } + + @Override + public Query getTermInSetQueryFromLongValues(List longValues) { + Query pointQuery = null; + Query dvQuery = null; + if (isSearchable()) { + pointQuery = LongPoint.newSetQuery(getName(), longValues); + if (!hasDocValues()) { + return pointQuery; + } + } + if (hasDocValues()) { + long[] longValuesArray = longValues.stream().mapToLong(l -> l).toArray(); + dvQuery = SortedNumericDocValuesField.newSlowSetQuery(getName(), longValuesArray); + if (!isSearchable()) { + return dvQuery; + } + } + return new IndexOrDocValuesQuery(pointQuery, dvQuery); + } + + @Override + public Query getTermQueryFromTextValue(String textValue) { + return getTermQueryFromLongValue(getTimeToIndex(textValue)); + } + + @Override + public Query getTermInSetQueryFromTextValues(List textValues) { + List longTerms = new ArrayList<>(textValues.size()); + textValues.forEach((s) -> longTerms.add(getTimeToIndex(s))); + return getTermInSetQueryFromLongValues(longTerms); + } + private long convertDateStringToMillis(String dateString) { if (dateTimeFormat.equals(EPOCH_MILLIS)) { return Long.parseLong(dateString); diff --git a/src/main/java/com/yelp/nrtsearch/server/field/properties/TermQueryable.java b/src/main/java/com/yelp/nrtsearch/server/field/properties/TermQueryable.java index ddaca23ba..17555d19c 100644 --- a/src/main/java/com/yelp/nrtsearch/server/field/properties/TermQueryable.java +++ b/src/main/java/com/yelp/nrtsearch/server/field/properties/TermQueryable.java @@ -16,6 +16,7 @@ package com.yelp.nrtsearch.server.field.properties; import com.yelp.nrtsearch.server.field.FieldDef; +import com.yelp.nrtsearch.server.field.IndexableFieldDef; import com.yelp.nrtsearch.server.grpc.TermInSetQuery; import com.yelp.nrtsearch.server.grpc.TermQuery; import java.util.List; @@ -260,4 +261,21 @@ default Query getTermInSetQueryFromLongValues(List longValues) { default Query getTermInSetQueryFromTextValues(List textValues) { return null; } + + /** + * Verify that this field supports term/term in set queries. + * + * @throws IllegalStateException if term queries are not supported + */ + default void checkTermQueriesSupported() { + if (!(this instanceof IndexableFieldDef indexableFieldDef)) { + throw new IllegalStateException("Instance is not an IndexableFieldDef"); + } + if (!indexableFieldDef.isSearchable()) { + throw new IllegalStateException( + "Field " + + indexableFieldDef.getName() + + " is not searchable, which is required for TermQuery / TermInSetQuery"); + } + } } diff --git a/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java b/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java index e68162c9f..9498c91b6 100644 --- a/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java +++ b/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java @@ -294,9 +294,9 @@ private Query getTermQuery(com.yelp.nrtsearch.server.grpc.TermQuery termQuery, I String fieldName = termQuery.getField(); FieldDef fieldDef = state.getFieldOrThrow(fieldName); - if (fieldDef instanceof TermQueryable) { - validateTermQueryIsSearchable(fieldDef); - return ((TermQueryable) fieldDef).getTermQuery(termQuery); + if (fieldDef instanceof TermQueryable termQueryable) { + termQueryable.checkTermQueriesSupported(); + return termQueryable.getTermQuery(termQuery); } String message = @@ -304,24 +304,14 @@ private Query getTermQuery(com.yelp.nrtsearch.server.grpc.TermQuery termQuery, I throw new IllegalArgumentException(String.format(message, termQuery, fieldDef.getType())); } - private void validateTermQueryIsSearchable(FieldDef fieldDef) { - if (fieldDef instanceof IndexableFieldDef - && !((IndexableFieldDef) fieldDef).isSearchable()) { - throw new IllegalStateException( - "Field " - + fieldDef.getName() - + " is not searchable, which is required for TermQuery / TermInSetQuery"); - } - } - private Query getTermInSetQuery( com.yelp.nrtsearch.server.grpc.TermInSetQuery termInSetQuery, IndexState state) { String fieldName = termInSetQuery.getField(); FieldDef fieldDef = state.getFieldOrThrow(fieldName); - if (fieldDef instanceof TermQueryable) { - validateTermQueryIsSearchable(fieldDef); - return ((TermQueryable) fieldDef).getTermInSetQuery(termInSetQuery); + if (fieldDef instanceof TermQueryable termQueryable) { + termQueryable.checkTermQueriesSupported(); + return termQueryable.getTermInSetQuery(termInSetQuery); } String message = diff --git a/src/test/java/com/yelp/nrtsearch/server/field/DateTimeFieldDefTest.java b/src/test/java/com/yelp/nrtsearch/server/field/DateTimeFieldDefTest.java index 72f64ac26..59a5f388c 100644 --- a/src/test/java/com/yelp/nrtsearch/server/field/DateTimeFieldDefTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/field/DateTimeFieldDefTest.java @@ -27,6 +27,8 @@ import com.yelp.nrtsearch.server.grpc.RangeQuery; import com.yelp.nrtsearch.server.grpc.SearchRequest; import com.yelp.nrtsearch.server.grpc.SearchResponse; +import com.yelp.nrtsearch.server.grpc.TermInSetQuery; +import com.yelp.nrtsearch.server.grpc.TermQuery; import io.grpc.StatusRuntimeException; import io.grpc.testing.GrpcCleanupRule; import java.io.IOException; @@ -100,12 +102,21 @@ private AddDocumentRequest buildDocument( "timestamp_string_format", MultiValuedField.newBuilder().addValue(timestampFormatted).build()) .putFields("single_stored", MultiValuedField.newBuilder().addValue(timestampMillis).build()) + .putFields( + "dv_only_single", MultiValuedField.newBuilder().addValue(timestampMillis).build()) + .putFields("stored_only", MultiValuedField.newBuilder().addValue(timestampMillis).build()) .putFields( "multi_stored", MultiValuedField.newBuilder() .addValue(timestampMillis) .addValue(String.valueOf(Long.parseLong(timestampMillis) + 2)) .build()) + .putFields( + "dv_only_multi", + MultiValuedField.newBuilder() + .addValue(timestampMillis) + .addValue(String.valueOf(Long.parseLong(timestampMillis) + 2)) + .build()) .build(); } @@ -565,6 +576,264 @@ private void rangeQueryWithCombinationOfSpecifiedBoundsAndExclusive(String dateF assertRangeQuery(rangeQuery, "3", "4", "6"); } + @Test + public void testTermQuery_searchAndDV() { + TermQuery termQuery = + TermQuery.newBuilder() + .setField("timestamp_epoch_millis") + .setTextValue("1611742000") + .build(); + assertTermQuery(termQuery, "1"); + + termQuery = + TermQuery.newBuilder().setField("timestamp_epoch_millis").setLongValue(1611742000).build(); + assertTermQuery(termQuery, "1"); + } + + @Test + public void testTermQuery_stringFormat() { + TermQuery termQuery = + TermQuery.newBuilder() + .setField("timestamp_string_format") + .setTextValue("2021-02-15 20:20:00") + .build(); + assertTermQuery(termQuery, "3"); + + termQuery = + TermQuery.newBuilder() + .setField("timestamp_string_format") + .setLongValue(1613420400000L) + .build(); + assertTermQuery(termQuery, "3"); + } + + @Test + public void testTermQuery_searchOnlySingle() { + TermQuery termQuery = + TermQuery.newBuilder().setField("single_stored").setTextValue("1611742000").build(); + assertTermQuery(termQuery, "1"); + + termQuery = TermQuery.newBuilder().setField("single_stored").setLongValue(1611742000).build(); + assertTermQuery(termQuery, "1"); + } + + @Test + public void testTermQuery_searchOnlyMulti() { + TermQuery termQuery = + TermQuery.newBuilder().setField("multi_stored").setTextValue("1613742002").build(); + assertTermQuery(termQuery, "4"); + + termQuery = TermQuery.newBuilder().setField("multi_stored").setLongValue(1613742002).build(); + assertTermQuery(termQuery, "4"); + } + + @Test + public void testTermQuery_dvOnlySingle() { + TermQuery termQuery = + TermQuery.newBuilder().setField("dv_only_single").setTextValue("1611742000").build(); + assertTermQuery(termQuery, "1"); + + termQuery = TermQuery.newBuilder().setField("dv_only_single").setLongValue(1611742000).build(); + assertTermQuery(termQuery, "1"); + } + + @Test + public void testTermQuery_dvOnlyMulti() { + TermQuery termQuery = + TermQuery.newBuilder().setField("dv_only_multi").setTextValue("1613742002").build(); + assertTermQuery(termQuery, "4"); + + termQuery = TermQuery.newBuilder().setField("dv_only_multi").setLongValue(1613742002).build(); + assertTermQuery(termQuery, "4"); + } + + @Test + public void testTermQuery_noSearchOrDV() { + try { + TermQuery termQuery = + TermQuery.newBuilder().setField("stored_only").setTextValue("1611742000").build(); + assertTermQuery(termQuery); + fail(); + } catch (StatusRuntimeException e) { + assertTrue( + e.getMessage() + .contains( + "Field \"stored_only\" is not searchable or does not have doc values, which is required for TermQuery / TermInSetQuery")); + } + } + + @Test + public void testTermInSetQuery_searchAndDV() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_epoch_millis") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1611742000") + .addTerms("1612742000") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_epoch_millis") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1611742000) + .addTerms(1612742000) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + } + + @Test + public void testTermInSetQuery_stringFormat() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_string_format") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("2021-02-15 20:20:00") + .addTerms("2021-03-15 20:20:00") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_string_format") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1613420400000L) + .addTerms(1615839600000L) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + } + + @Test + public void testTermInSetQuery_searchOnlySingle() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("single_stored") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1611742000") + .addTerms("1612742000") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("single_stored") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1611742000) + .addTerms(1612742000) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + } + + @Test + public void testTermInSetQuery_searchOnlyMulti() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("multi_stored") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1612742002") + .addTerms("1613742002") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("multi_stored") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1612742002) + .addTerms(1613742002) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + } + + @Test + public void testTermInSetQuery_dvOnlySingle() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("dv_only_single") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1611742000") + .addTerms("1612742000") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("dv_only_single") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1611742000) + .addTerms(1612742000) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + } + + @Test + public void testTermInSetQuery_dvOnlyMulti() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("dv_only_multi") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1612742002") + .addTerms("1613742002") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("dv_only_multi") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1612742002) + .addTerms(1613742002) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + } + + @Test + public void testTermInSetQuery_noSearchOrDV() { + try { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("stored_only") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1611742000") + .addTerms("1612742000") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery); + fail(); + } catch (StatusRuntimeException e) { + assertTrue( + e.getMessage() + .contains( + "Field \"stored_only\" is not searchable or does not have doc values, which is required for TermQuery / TermInSetQuery")); + } + } + private void assertRangeQuery(RangeQuery rangeQuery, String... expectedIds) { String idFieldName = "doc_id"; Query query = Query.newBuilder().setRangeQuery(rangeQuery).build(); @@ -582,6 +851,40 @@ private void assertRangeQuery(RangeQuery rangeQuery, String... expectedIds) { assertEquals(expected, actualValues); } + private void assertTermQuery(TermQuery termQuery, String... expectedIds) { + String idFieldName = "doc_id"; + Query query = Query.newBuilder().setTermQuery(termQuery).build(); + SearchResponse searchResponse = doQuery(query, List.of(idFieldName)); + assertEquals(expectedIds.length, searchResponse.getHitsCount()); + List actualValues = + searchResponse.getHitsList().stream() + .map( + hit -> + hit.getFieldsMap().get(idFieldName).getFieldValueList().get(0).getTextValue()) + .sorted() + .collect(Collectors.toList()); + List expected = Arrays.asList(expectedIds); + expected.sort(Comparator.comparing(Function.identity())); + assertEquals(expected, actualValues); + } + + private void assertTermInSetQuery(TermInSetQuery termInSetQuery, String... expectedIds) { + String idFieldName = "doc_id"; + Query query = Query.newBuilder().setTermInSetQuery(termInSetQuery).build(); + SearchResponse searchResponse = doQuery(query, List.of(idFieldName)); + assertEquals(expectedIds.length, searchResponse.getHitsCount()); + List actualValues = + searchResponse.getHitsList().stream() + .map( + hit -> + hit.getFieldsMap().get(idFieldName).getFieldValueList().get(0).getTextValue()) + .sorted() + .collect(Collectors.toList()); + List expected = Arrays.asList(expectedIds); + expected.sort(Comparator.comparing(Function.identity())); + assertEquals(expected, actualValues); + } + private SearchResponse doQuery(Query query, List fields) { return getGrpcServer() .getBlockingStub() diff --git a/src/test/resources/field/registerFieldsDateTime.json b/src/test/resources/field/registerFieldsDateTime.json index 05c4a96d2..91cfbd582 100644 --- a/src/test/resources/field/registerFieldsDateTime.json +++ b/src/test/resources/field/registerFieldsDateTime.json @@ -90,6 +90,25 @@ "search": true, "multiValued": true, "store": true + }, + { + "name": "dv_only_single", + "type": "DATE_TIME", + "dateTimeFormat": "epoch_millis", + "storeDocValues": true + }, + { + "name": "dv_only_multi", + "type": "DATE_TIME", + "dateTimeFormat": "epoch_millis", + "multiValued": true, + "storeDocValues": true + }, + { + "name": "stored_only", + "type": "DATE_TIME", + "dateTimeFormat": "epoch_millis", + "store": true } ] } \ No newline at end of file