diff --git a/dictonaryReqeust.http b/dictonaryReqeust.http index e474278..ae53188 100644 --- a/dictonaryReqeust.http +++ b/dictonaryReqeust.http @@ -12,4 +12,5 @@ Content-Type: application/json POST http://localhost:80/search Content-Type: application/json -{"@type":"GeneralQueryRequest","resourceCredentials":{},"query":{"searchTerm":"breast","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":10000000},"resourceUUID":null} \ No newline at end of file +{"@type":"GeneralQueryRequest","resourceCredentials":{},"query":{"searchTerm":"throat sore acute #8","includedTags":[], + "excludedTags":[],"returnTags":"true","offset":0,"limit":100000},"resourceUUID":null} diff --git a/src/main/java/edu/harvard/dbmi/avillach/dictionary/concept/ConceptFilterQueryGenerator.java b/src/main/java/edu/harvard/dbmi/avillach/dictionary/concept/ConceptFilterQueryGenerator.java index 5388c40..fd35437 100644 --- a/src/main/java/edu/harvard/dbmi/avillach/dictionary/concept/ConceptFilterQueryGenerator.java +++ b/src/main/java/edu/harvard/dbmi/avillach/dictionary/concept/ConceptFilterQueryGenerator.java @@ -82,37 +82,7 @@ public QueryParamPair generateFilterQuery(Filter filter, Pageable pageable) { params.addValue("consents", filter.consents()); } clauses.add(createValuelessNodeFilter(filter.search(), filter.consents())); - - - String query = "(\n" + String.join("\n\tINTERSECT\n", clauses) + "\n)"; - String superQuery = """ - WITH q AS ( - %s - ) - %s - SELECT q.concept_node_id AS concept_node_id, max((1 + rank) * coalesce(rank_adjustment, 1)) AS rank - FROM q - LEFT JOIN allow_filtering ON allow_filtering.concept_node_id = q.concept_node_id - GROUP BY q.concept_node_id - ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC, q.concept_node_id ASC - """.formatted(query, RANK_ADJUSTMENTS); - // explanation of ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC - // you want to sort the best matches first, BUT anything that is marked as unfilterable should be put last - // coalesce will return the first non null value; this solves rows that aren't marked as filterable or not - // I then multiply that by 1 + rank instead of just rank so that a rank value of 0 for an unfilterable var - // is placed below a rank value of 0 for a filterable var - // Finally, I add the concept node id to the sort to keep it stable for ties, otherwise pagination gets weird - - if (pageable.isPaged()) { - superQuery = superQuery + """ - LIMIT :limit - OFFSET :offset - """; - params.addValue("limit", pageable.getPageSize()).addValue("offset", pageable.getOffset()); - } - - superQuery = " concepts_filtered_sorted AS (\n" + superQuery + "\n)"; - + String superQuery = getSuperQuery(pageable, clauses, params); return new QueryParamPair(superQuery, params); } @@ -184,4 +154,78 @@ facet.name IN (:facets_for_category_%s ) AND facet_category.name = :category_%s }).toList(); } + public QueryParamPair generateLegacyFilterQuery(Filter filter, Pageable pageable) { + MapSqlParameterSource params = new MapSqlParameterSource(); + params.addValue("disallowed_meta_keys", disallowedMetaFields); + List clauses = new java.util.ArrayList<>(List.of()); + if (StringUtils.hasLength(filter.search())) { + params.addValue("dynamic_tsquery", filter.search().trim()); + } + clauses.add(createDynamicValuelessNodeFilter(filter.search())); + String superQuery = getSuperQuery(pageable, clauses, params); + + return new QueryParamPair(superQuery, params); + } + + private String createDynamicValuelessNodeFilter(String search) { + String rankQuery = "0 as rank"; + String rankWhere = ""; + if (StringUtils.hasLength(search)) { + rankQuery = "ts_rank(searchable_fields, to_tsquery(:dynamic_tsquery)) AS rank"; + rankWhere = "concept_node.searchable_fields @@ to_tsquery(:dynamic_tsquery) AND"; + } + return """ + SELECT + concept_node.concept_node_id, + %s + FROM + concept_node + LEFT JOIN dataset ON concept_node.dataset_id = dataset.dataset_id + LEFT JOIN concept_node_meta AS continuous_min ON concept_node.concept_node_id = continuous_min.concept_node_id AND continuous_min.KEY = 'min' + LEFT JOIN concept_node_meta AS continuous_max ON concept_node.concept_node_id = continuous_max.concept_node_id AND continuous_max.KEY = 'max' + LEFT JOIN concept_node_meta AS categorical_values ON concept_node.concept_node_id = categorical_values.concept_node_id AND categorical_values.KEY = 'values' + WHERE + %s + %s + ( + continuous_min.value <> '' OR + continuous_max.value <> '' OR + categorical_values.value <> '' + ) + """ + .formatted(rankQuery, rankWhere, ""); + } + + private static String getSuperQuery(Pageable pageable, List clauses, MapSqlParameterSource params) { + String query = "(\n" + String.join("\n\tINTERSECT\n", clauses) + "\n)"; + String superQuery = """ + WITH q AS ( + %s + ) + %s + SELECT q.concept_node_id AS concept_node_id, max((1 + rank) * coalesce(rank_adjustment, 1)) AS rank + FROM q + LEFT JOIN allow_filtering ON allow_filtering.concept_node_id = q.concept_node_id + GROUP BY q.concept_node_id + ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC, q.concept_node_id ASC + """.formatted(query, RANK_ADJUSTMENTS); + // explanation of ORDER BY max((1 + rank) * coalesce(rank_adjustment, 1)) DESC + // you want to sort the best matches first, BUT anything that is marked as unfilterable should be put last + // coalesce will return the first non null value; this solves rows that aren't marked as filterable or not + // I then multiply that by 1 + rank instead of just rank so that a rank value of 0 for an unfilterable var + // is placed below a rank value of 0 for a filterable var + // Finally, I add the concept node id to the sort to keep it stable for ties, otherwise pagination gets weird + + if (pageable.isPaged()) { + superQuery = superQuery + """ + LIMIT :limit + OFFSET :offset + """; + params.addValue("limit", pageable.getPageSize()).addValue("offset", pageable.getOffset()); + } + + superQuery = " concepts_filtered_sorted AS (\n" + superQuery + "\n)"; + return superQuery; + } + } diff --git a/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapper.java b/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapper.java index 001436c..8a23d3c 100644 --- a/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapper.java +++ b/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapper.java @@ -3,23 +3,25 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import edu.harvard.dbmi.avillach.dictionary.filter.Filter; -import edu.harvard.dbmi.avillach.dictionary.filter.FilterProcessor; import edu.harvard.dbmi.avillach.dictionary.legacysearch.model.LegacySearchQuery; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Component; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; @Component public class LegacySearchQueryMapper { private static final ObjectMapper objectMapper = new ObjectMapper(); - private final FilterProcessor filterProcessor; + private static final Logger log = LoggerFactory.getLogger(LegacySearchQueryMapper.class); - public LegacySearchQueryMapper(FilterProcessor filterProcessor) { - this.filterProcessor = filterProcessor; - } + public LegacySearchQueryMapper() {} public LegacySearchQuery mapFromJson(String jsonString) throws IOException { JsonNode rootNode = objectMapper.readTree(jsonString); @@ -27,8 +29,33 @@ public LegacySearchQuery mapFromJson(String jsonString) throws IOException { String searchTerm = queryNode.get("searchTerm").asText(); int limit = queryNode.get("limit").asInt(); - Filter filter = filterProcessor.processsFilter(new Filter(List.of(), searchTerm, List.of())); - return new LegacySearchQuery(filter, PageRequest.of(0, limit)); + searchTerm = constructTsQuery(searchTerm); + log.debug("Constructed Search Term: {}", searchTerm); + return new LegacySearchQuery(new Filter(List.of(), searchTerm, List.of()), PageRequest.of(0, limit)); + } + + // An attempt to provide OR search that will produce similar results to legacy search-prototype + private String constructTsQuery(String searchTerm) { + // Split on the | to enable or queries + String[] orGroups = searchTerm.split("\\|"); + List orClauses = new ArrayList<>(); + + for (String group : orGroups) { + // To replicate legacy search we will split using its regex [\\s\\p{Punct}]+ + String[] tokens = group.trim().split("[\\s\\p{Punct}]+"); + + // Now we will combine the tokens in this group and '&' them together. + String andClause = Arrays.stream(tokens).filter(token -> !token.isBlank()) // remove empty tokens. + .map(token -> token + ":*") // add the wild card for search + .collect(Collectors.joining(" & ")); + + if (!andClause.isBlank()) { + orClauses.add(andClause); + } + } + + + return String.join(" | ", orClauses); } } diff --git a/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchRepository.java b/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchRepository.java index 9ea09c4..4ecdea3 100644 --- a/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchRepository.java +++ b/src/main/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchRepository.java @@ -35,7 +35,7 @@ public LegacySearchRepository( } public List getLegacySearchResults(Filter filter, Pageable pageable) { - QueryParamPair filterQ = filterGen.generateFilterQuery(filter, pageable); + QueryParamPair filterQ = filterGen.generateLegacyFilterQuery(filter, pageable); String sql = ALLOW_FILTERING_Q + ", " + filterQ.query() + """ SELECT concept_node.concept_path AS conceptPath, concept_node.display AS display, diff --git a/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchControllerIntegrationTest.java b/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchControllerIntegrationTest.java index d68a4f4..b939684 100644 --- a/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchControllerIntegrationTest.java +++ b/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchControllerIntegrationTest.java @@ -54,4 +54,38 @@ void shouldGetLegacyResponseByStudyID() throws IOException { searchResults.forEach(searchResult -> Assertions.assertEquals("phs000007", searchResult.result().studyId())); } + @Test + void shouldHandleORRequest() throws IOException { + String jsonString = """ + {"query":{"searchTerm":"age","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}} + """; + + ResponseEntity legacyResponseResponseEntity = legacySearchController.legacySearch(jsonString); + Assertions.assertEquals(HttpStatus.OK, legacyResponseResponseEntity.getStatusCode()); + LegacyResponse legacyResponseBody = legacyResponseResponseEntity.getBody(); + Assertions.assertNotNull(legacyResponseBody); + Results results = legacyResponseBody.results(); + List ageSearchResults = results.searchResults(); + Assertions.assertEquals(4, ageSearchResults.size()); + + jsonString = """ + {"query":{"searchTerm":"physical|age","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}} + """; + + legacyResponseResponseEntity = legacySearchController.legacySearch(jsonString); + Assertions.assertEquals(HttpStatus.OK, legacyResponseResponseEntity.getStatusCode()); + legacyResponseBody = legacyResponseResponseEntity.getBody(); + Assertions.assertNotNull(legacyResponseBody); + results = legacyResponseBody.results(); + List physicalORAgeSearchResults = results.searchResults(); + Assertions.assertEquals(5, physicalORAgeSearchResults.size()); + + // Verify that age|physical has expanded the search results + Assertions.assertNotEquals(ageSearchResults.size(), physicalORAgeSearchResults.size()); + + // Verify the OR statement has more results + Assertions.assertTrue(ageSearchResults.size() < physicalORAgeSearchResults.size()); + } + + } diff --git a/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapperTest.java b/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapperTest.java index e0661ab..6c8cfa2 100644 --- a/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapperTest.java +++ b/src/test/java/edu/harvard/dbmi/avillach/dictionary/legacysearch/LegacySearchQueryMapperTest.java @@ -28,12 +28,12 @@ void shouldParseSearchRequest() throws IOException { Filter filter = legacySearchQuery.filter(); Pageable pageable = legacySearchQuery.pageable(); - Assertions.assertEquals("age", filter.search()); + Assertions.assertEquals("age:*", filter.search()); Assertions.assertEquals(100, pageable.getPageSize()); } @Test - void shouldReplaceUnderscore() throws IOException { + void shouldHandlePunct() throws IOException { String jsonString = """ {"query":{"searchTerm":"tutorial-biolincc_digitalis","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}} @@ -43,7 +43,35 @@ void shouldReplaceUnderscore() throws IOException { Filter filter = legacySearchQuery.filter(); Pageable pageable = legacySearchQuery.pageable(); - Assertions.assertEquals("tutorial-biolincc/digitalis", filter.search()); + Assertions.assertEquals("tutorial:* & biolincc:* & digitalis:*", filter.search()); + Assertions.assertEquals(100, pageable.getPageSize()); + } + + @Test + void shouldHandleOR() throws IOException { + String jsonString = """ + {"query":{"searchTerm":"sex|gender","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}} + """; + + LegacySearchQuery legacySearchQuery = legacySearchQueryMapper.mapFromJson(jsonString); + Filter filter = legacySearchQuery.filter(); + Pageable pageable = legacySearchQuery.pageable(); + + Assertions.assertEquals("sex:* | gender:*", filter.search()); + Assertions.assertEquals(100, pageable.getPageSize()); + } + + @Test + void shouldHandleORAndPunct() throws IOException { + String jsonString = """ + {"query":{"searchTerm":"sex|gender age","includedTags":[],"excludedTags":[],"returnTags":"true","offset":0,"limit":100}} + """; + + LegacySearchQuery legacySearchQuery = legacySearchQueryMapper.mapFromJson(jsonString); + Filter filter = legacySearchQuery.filter(); + Pageable pageable = legacySearchQuery.pageable(); + + Assertions.assertEquals("sex:* | gender:* & age:*", filter.search()); Assertions.assertEquals(100, pageable.getPageSize()); }