From f7b65ebb6803a11e35fb49bcf2c30a7042dd1ffd Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 16 Dec 2024 15:45:56 -0800 Subject: [PATCH] Fix #issue-13914-: The persona JSON schema is named Team --- .../service/search/SearchQueryBuilder.java | 64 ++++ .../elasticsearch/ElasticSearchClient.java | 126 +------ .../ElasticSearchQueryBuilder.java | 332 ++++++++++++++++++ .../data/searchSettings/searchSettings.json | 27 +- .../schema/configuration/searchSettings.json | 22 ++ .../generated/configuration/searchSettings.ts | 261 +++++++++++++- 6 files changed, 713 insertions(+), 119 deletions(-) create mode 100644 openmetadata-service/src/main/java/org/openmetadata/service/search/SearchQueryBuilder.java create mode 100644 openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchQueryBuilder.java diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchQueryBuilder.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchQueryBuilder.java new file mode 100644 index 000000000000..434756708c50 --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchQueryBuilder.java @@ -0,0 +1,64 @@ +package org.openmetadata.service.search; + +import es.org.elasticsearch.search.builder.SearchSourceBuilder; +import org.openmetadata.schema.api.search.SearchSettings; +import org.openmetadata.schema.settings.SettingsType; +import org.openmetadata.service.Entity; +import org.openmetadata.service.resources.settings.SettingsCache; + +import java.io.IOException; + +public interface SearchQueryBuilder { + SearchSourceBuilder getSearchSourceBuilder(String index, String q, int from, int size) throws IOException; + void applyDeletedLogic(SearchRequest request, SearchSourceBuilder searchSourceBuilder); + void applyGlossaryHierarchyLogic(SearchRequest request, SearchSourceBuilder searchSourceBuilder); + default String getAssetTypeFromIndex(String index) { + return switch (index) { + case "topic_search_index", "topic" -> "topic"; + case "dashboard_search_index", "dashboard" -> "dashboard"; + case "pipeline_search_index", "pipeline" -> "pipeline"; + case "mlmodel_search_index", "mlmodel" -> "mlmodel"; + case "table_search_index", "table" -> "table"; + case "database_schema_search_index", + "databaseSchema", + "database_search_index", + "database" -> "dataAsset"; + case "user_search_index", "user", "team_search_index", "team" -> "user"; + case "glossary_term_search_index", "glossaryTerm" -> "glossaryTerm"; + case "tag_search_index", "tag" -> "tag"; + case "container_search_index", "container" -> "container"; + case "query_search_index", "query" -> "query"; + case "test_case_search_index", + "testCase", + "test_suite_search_index", + "testSuite" -> "testCase"; + case "stored_procedure_search_index", "storedProcedure" -> "storedProcedure"; + case "dashboard_data_model_search_index", + "dashboardDataModel" -> "dashboardDataModel"; + case "search_entity_search_index", "searchIndex" -> "searchIndex"; + case "domain_search_index", "domain" -> "domain"; + case "raw_cost_analysis_report_data_index" -> "rawCostAnalysisReportData"; + case "aggregated_cost_analysis_report_data_index" -> "aggregatedCostAnalysisReportData"; + case "data_product_search_index" -> "dataProduct"; + case "test_case_resolution_status_search_index" -> "test_case_resolution_status_search_index"; + case "test_case_result_search_index" -> "testCase"; + case "api_endpoint_search_index", "apiEndpoint" -> "apiEndpoint"; + case "api_service_search_index", + "mlmodel_service_search_index", + "database_service_search_index", + "messaging_service_index", + "dashboard_service_index", + "pipeline_service_index", + "storage_service_index", + "search_service_index", + "metadata_service_index" -> "default"; + case "dataAsset" -> "dataAsset"; + case "all" -> "all"; + default -> "dataAsset"; + }; + } + + default SearchSettings getSearchSettings() { + return SettingsCache.getSetting(SettingsType.SEARCH_SETTINGS, SearchSettings.class); + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java index fec119ce0a46..93364f96b84e 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java @@ -166,6 +166,7 @@ import org.openmetadata.service.search.SearchAggregation; import org.openmetadata.service.search.SearchClient; import org.openmetadata.service.search.SearchIndexUtils; +import org.openmetadata.service.search.SearchQueryBuilder; import org.openmetadata.service.search.SearchRequest; import org.openmetadata.service.search.SearchSortFilter; import org.openmetadata.service.search.UpdateSearchEventsConstant; @@ -244,6 +245,8 @@ public class ElasticSearchClient implements SearchClient { Stream.concat(FIELDS_TO_REMOVE.stream(), Stream.of("schemaDefinition", "customMetrics")) .toList(); + private final SearchQueryBuilder searchQueryBuilder = new ElasticSearchQueryBuilder(); + static { SearchModule searchModule = new SearchModule(Settings.EMPTY, false, List.of()); xContentRegistry = new NamedXContentRegistry(searchModule.getNamedXContents()); @@ -366,9 +369,9 @@ public void deleteIndex(IndexMapping indexMapping) { @Override public Response search(SearchRequest request, SubjectContext subjectContext) throws IOException { + // Instead of using the old large switch-case, we now rely on SearchQueryBuilder to use searchSettings SearchSourceBuilder searchSourceBuilder = - getSearchSourceBuilder( - request.getIndex(), request.getQuery(), request.getFrom(), request.getSize()); + searchQueryBuilder.getSearchSourceBuilder(request.getIndex(), request.getQuery(), request.getFrom(), request.getSize()); buildSearchRBACQuery(subjectContext, searchSourceBuilder); @@ -393,62 +396,10 @@ public Response search(SearchRequest request, SubjectContext subjectContext) thr searchSourceBuilder.searchAfter(request.getSearchAfter()); } - /* For backward-compatibility we continue supporting the deleted argument, this should be removed in future versions */ - if (request - .getIndex() - .equalsIgnoreCase(Entity.getSearchRepository().getIndexOrAliasName(GLOBAL_SEARCH_ALIAS)) - || request - .getIndex() - .equalsIgnoreCase(Entity.getSearchRepository().getIndexOrAliasName("dataAsset"))) { - es.org.elasticsearch.index.query.BoolQueryBuilder boolQueryBuilder = - QueryBuilders.boolQuery(); - boolQueryBuilder.should( - QueryBuilders.boolQuery() - .must(searchSourceBuilder.query()) - .must(QueryBuilders.existsQuery("deleted")) - .must(QueryBuilders.termQuery("deleted", request.isDeleted()))); - boolQueryBuilder.should( - QueryBuilders.boolQuery() - .must(searchSourceBuilder.query()) - .mustNot(QueryBuilders.existsQuery("deleted"))); - searchSourceBuilder.query(boolQueryBuilder); - } else if (request - .getIndex() - .equalsIgnoreCase( - Entity.getSearchRepository().getIndexMapping(DOMAIN).getIndexName(clusterAlias)) - || request - .getIndex() - .equalsIgnoreCase( - Entity.getSearchRepository() - .getIndexMapping(DATA_PRODUCT) - .getIndexName(clusterAlias)) - || request - .getIndex() - .equalsIgnoreCase( - Entity.getSearchRepository().getIndexMapping(QUERY).getIndexName(clusterAlias)) - || request - .getIndex() - .equalsIgnoreCase( - Entity.getSearchRepository().getIndexOrAliasName("knowledge_page_search_index")) - || request - .getIndex() - .equalsIgnoreCase( - Entity.getSearchRepository() - .getIndexMapping(RAW_COST_ANALYSIS_REPORT_DATA) - .getIndexName(clusterAlias)) - || request - .getIndex() - .equalsIgnoreCase( - Entity.getSearchRepository() - .getIndexMapping(AGGREGATED_COST_ANALYSIS_REPORT_DATA) - .getIndexName(clusterAlias))) { - searchSourceBuilder.query(QueryBuilders.boolQuery().must(searchSourceBuilder.query())); - } else { - searchSourceBuilder.query( - QueryBuilders.boolQuery() - .must(searchSourceBuilder.query()) - .must(QueryBuilders.termQuery("deleted", request.isDeleted()))); - } + // Deleted filtering logic can be integrated into SearchQueryBuilder if needed, + // or remain here for backward compatibility: + // Apply deleted logic for backward-compatibility + searchQueryBuilder.applyDeletedLogic(request, searchSourceBuilder); if (!nullOrEmpty(request.getSortFieldParam()) && !request.isGetHierarchy()) { FieldSortBuilder fieldSortBuilder = @@ -461,62 +412,9 @@ public Response search(SearchRequest request, SubjectContext subjectContext) thr searchSourceBuilder.sort(fieldSortBuilder); } - if (request - .getIndex() - .equalsIgnoreCase( - Entity.getSearchRepository() - .getIndexMapping(GLOSSARY_TERM) - .getIndexName(clusterAlias))) { - searchSourceBuilder.query(QueryBuilders.boolQuery().must(searchSourceBuilder.query())); - - if (request.isGetHierarchy()) { - QueryBuilder baseQuery = - QueryBuilders.boolQuery() - .should(searchSourceBuilder.query()) - .should(QueryBuilders.matchPhraseQuery("fullyQualifiedName", request.getQuery())) - .should(QueryBuilders.matchPhraseQuery("name", request.getQuery())) - .should(QueryBuilders.matchPhraseQuery("displayName", request.getQuery())) - .should( - QueryBuilders.matchPhraseQuery( - "glossary.fullyQualifiedName", request.getQuery())) - .should(QueryBuilders.matchPhraseQuery("glossary.displayName", request.getQuery())) - .must(QueryBuilders.matchQuery("status", "Approved")) - .minimumShouldMatch(1); - searchSourceBuilder.query(baseQuery); - - SearchResponse searchResponse = - client.search( - new es.org.elasticsearch.action.search.SearchRequest(request.getIndex()) - .source(searchSourceBuilder), - RequestOptions.DEFAULT); - - // Extract parent terms from aggregation - BoolQueryBuilder parentTermQueryBuilder = QueryBuilders.boolQuery(); - Terms parentTerms = searchResponse.getAggregations().get("fqnParts_agg"); - - // Build es query to get parent terms for the user input query , to build correct hierarchy - if (!parentTerms.getBuckets().isEmpty() && !request.getQuery().equals("*")) { - parentTerms.getBuckets().stream() - .map(Terms.Bucket::getKeyAsString) - .forEach( - parentTerm -> - parentTermQueryBuilder.should( - QueryBuilders.matchQuery("fullyQualifiedName", parentTerm))); - - searchSourceBuilder.query( - parentTermQueryBuilder - .minimumShouldMatch(1) - .must(QueryBuilders.matchQuery("status", "Approved"))); - } - searchSourceBuilder.sort(SortBuilders.fieldSort("fullyQualifiedName").order(SortOrder.ASC)); - } - } + searchQueryBuilder.applyGlossaryHierarchyLogic(request, searchSourceBuilder); - /* for performance reasons ElasticSearch doesn't provide accurate hits - if we enable trackTotalHits parameter it will try to match every result, count and return hits - however in most cases for search results an approximate value is good enough. - we are displaying total entity counts in landing page and explore page where we need the total count - https://github.com/elastic/elasticsearch/issues/33028 */ + // fetch source and track hits searchSourceBuilder.fetchSource( new FetchSourceContext( request.isFetchSource(), @@ -526,7 +424,7 @@ public Response search(SearchRequest request, SubjectContext subjectContext) thr if (request.isTrackTotalHits()) { searchSourceBuilder.trackTotalHits(true); } else { - searchSourceBuilder.trackTotalHitsUpTo(MAX_RESULT_HITS); + searchSourceBuilder.trackTotalHitsUpTo(searchQueryBuilder.getSearchSettings().getGlobalSettings().getMaxResultHits()); } searchSourceBuilder.timeout(new TimeValue(30, TimeUnit.SECONDS)); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchQueryBuilder.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchQueryBuilder.java new file mode 100644 index 000000000000..2289eb4cf30c --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchQueryBuilder.java @@ -0,0 +1,332 @@ +package org.openmetadata.service.search.elasticsearch; + +import es.org.elasticsearch.common.lucene.search.function.CombineFunction; +import es.org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction; +import es.org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; +import es.org.elasticsearch.index.query.BoolQueryBuilder; +import es.org.elasticsearch.index.query.Operator; +import es.org.elasticsearch.index.query.QueryBuilders; +import es.org.elasticsearch.index.query.QueryBuilder; +import es.org.elasticsearch.index.query.RangeQueryBuilder; +import es.org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import es.org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder; +import es.org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; +import es.org.elasticsearch.search.aggregations.AggregationBuilder; +import es.org.elasticsearch.search.aggregations.AggregationBuilders; +import es.org.elasticsearch.search.builder.SearchSourceBuilder; +import es.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import es.org.elasticsearch.search.sort.SortBuilders; +import es.org.elasticsearch.search.sort.SortOrder; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.openmetadata.schema.api.search.Aggregation; +import org.openmetadata.schema.api.search.FieldValueBoost; +import org.openmetadata.schema.api.search.Range; +import org.openmetadata.schema.api.search.TagBoost; +import org.openmetadata.schema.entity.data.EntityHierarchy__1; +import org.openmetadata.schema.settings.SettingsType; +import org.openmetadata.schema.api.search.SearchSettings; +import org.openmetadata.schema.api.search.GlobalSettings; +import org.openmetadata.schema.api.search.AssetTypeConfiguration; +import org.openmetadata.service.Entity; +import org.openmetadata.service.resources.settings.SettingsCache; +import org.openmetadata.service.search.SearchRequest; +import org.openmetadata.service.search.SearchQueryBuilder; +import org.openmetadata.service.util.JsonUtils; + +public class ElasticSearchQueryBuilder implements SearchQueryBuilder { + + @Override + public SearchSourceBuilder getSearchSourceBuilder(String index, String q, int from, int size) throws IOException { + // Instead of reading from a JSON file or cached file, + // we get the SearchSettings directly from SettingsCache. + SearchSettings searchSettings = SettingsCache.getSetting(SettingsType.SEARCH_SETTINGS, SearchSettings.class); + + String assetType = getAssetTypeFromIndex(index); + AssetTypeConfiguration config = + searchSettings.getAssetTypeConfigurations().stream() + .filter(c -> c.getAssetType().equalsIgnoreCase(assetType)) + .findFirst() + .orElse(searchSettings.getDefaultConfiguration()); + if (config == null) { + config = searchSettings.getDefaultConfiguration(); + } + + return buildSearchSourceBuilder(q, from, size, config, searchSettings); + } + + private SearchSourceBuilder buildSearchSourceBuilder( + String query, int from, int size, AssetTypeConfiguration config, SearchSettings searchSettings) { + + QueryBuilder finalQuery = buildQuery(query, config, searchSettings); + HighlightBuilder hb = buildHighlights(config.getHighlightFields(), searchSettings.getGlobalSettings()); + + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() + .query(finalQuery) + .from(from) + .size(size) + .highlighter(hb); + + // Add asset-type specific aggregations + if (config.getAggregations() != null) { + for (Aggregation aggConfig : config.getAggregations()) { + AggregationBuilder aggBuilder = buildAggregation(aggConfig, searchSettings); + if (aggBuilder != null) { + searchSourceBuilder.aggregation(aggBuilder); + } + } + } + + // Add global aggregations + GlobalSettings globalSettings = searchSettings.getGlobalSettings(); + if (globalSettings != null && globalSettings.getAggregations() != null) { + for (Aggregation aggConfig : globalSettings.getAggregations()) { + AggregationBuilder aggBuilder = buildAggregation(aggConfig, searchSettings); + if (aggBuilder != null) { + searchSourceBuilder.aggregation(aggBuilder); + } + } + } + + return searchSourceBuilder; + } + + private QueryBuilder buildQuery(String query, AssetTypeConfiguration config, SearchSettings searchSettings) { + BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); + + // Must match + if (config.getMustMatch() != null && !config.getMustMatch().isEmpty()) { + Map mustFields = new HashMap<>(); + for (String field : config.getMustMatch()) { + mustFields.put(field, config.getFields().getAdditionalProperties().getOrDefault(field, 1.0).floatValue()); + } + if (!mustFields.isEmpty()) { + QueryBuilder mustQuery = QueryBuilders.queryStringQuery(query) + .fields(mustFields) + .defaultOperator(Operator.AND); + boolQuery.must(mustQuery); + } + } + + // Should match + if (config.getShouldMatch() != null && !config.getShouldMatch().isEmpty()) { + Map shouldFields = new HashMap<>(); + for (String field : config.getShouldMatch()) { + shouldFields.put(field, config.getFields().getAdditionalProperties().getOrDefault(field, 1.0).floatValue()); + } + if (!shouldFields.isEmpty()) { + QueryBuilder shouldQuery = QueryBuilders.queryStringQuery(query) + .fields(shouldFields) + .defaultOperator(Operator.OR); + boolQuery.should(shouldQuery); + } + } + + // Apply tier boosts, usage boosts, etc. if needed + return applyBoosts(boolQuery, searchSettings); + } + + private FunctionScoreQueryBuilder applyBoosts(QueryBuilder baseQuery, SearchSettings searchSettings) { + List functions = new ArrayList<>(); + + // Apply tagBoosts dynamically + if (searchSettings.getGlobalSettings() != null && searchSettings.getGlobalSettings().getTagBoosts() != null) { + for (TagBoost tagBoost : searchSettings.getGlobalSettings().getTagBoosts()) { + QueryBuilder filterQuery = QueryBuilders.termQuery(tagBoost.getField(), tagBoost.getTagFQN()); + functions.add(new FunctionScoreQueryBuilder.FilterFunctionBuilder( + filterQuery, + ScoreFunctionBuilders.weightFactorFunction(tagBoost.getBoost().floatValue()) + )); + } + } + + // Apply fieldValueBoosts dynamically using buildRangeQuery + if (searchSettings.getGlobalSettings() != null && searchSettings.getGlobalSettings().getFieldValueBoosts() != null) { + for (FieldValueBoost fvb : searchSettings.getGlobalSettings().getFieldValueBoosts()) { + Range range = fvb.getCondition().getRange(); // returns a Range object + QueryBuilder filterQuery = buildRangeQuery(fvb.getField(), range); + + FieldValueFactorFunction.Modifier modifier = FieldValueFactorFunction.Modifier.valueOf(fvb.getModifier().value().toUpperCase()); + ScoreFunctionBuilder scoreFunction = ScoreFunctionBuilders.fieldValueFactorFunction(fvb.getField()) + .factor(fvb.getFactor().floatValue()) + .modifier(modifier) + .missing(fvb.getMissing().floatValue()); + + functions.add(new FunctionScoreQueryBuilder.FilterFunctionBuilder(filterQuery, scoreFunction)); + } + } + + // Combine all boosts + if (!functions.isEmpty()) { + return QueryBuilders.functionScoreQuery( + baseQuery, + functions.toArray(new FunctionScoreQueryBuilder.FilterFunctionBuilder[0])) + .scoreMode(FunctionScoreQuery.ScoreMode.SUM) + .boostMode(CombineFunction.MULTIPLY); + } else { + return QueryBuilders.functionScoreQuery(baseQuery); + } + } + + private QueryBuilder buildRangeQuery(String field, Range range) { + RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery(field); + + if (range != null) { + if (range.getGt() != null) { + rangeQuery.gt(range.getGt()); + } + if (range.getGte() != null) { + rangeQuery.gte(range.getGte()); + } + if (range.getLt() != null) { + rangeQuery.lt(range.getLt()); + } + if (range.getLte() != null) { + rangeQuery.lte(range.getLte()); + } + } + + return rangeQuery; + } + + private HighlightBuilder buildHighlights(List highlightFields, GlobalSettings globalSettings) { + HighlightBuilder hb = new HighlightBuilder(); + if (highlightFields != null) { + for (String field : highlightFields) { + hb.field(new HighlightBuilder.Field(field).highlighterType("unified")); + } + } + hb.preTags(""); + hb.postTags(""); + if (globalSettings != null) { + hb.maxAnalyzedOffset(globalSettings.getMaxAnalyzedOffset()); + } + return hb; + } + + private AggregationBuilder buildAggregation(Aggregation aggConfig, SearchSettings searchSettings) { + if ("terms".equals(aggConfig.getType())) { + es.org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder termsAgg = + AggregationBuilders.terms(aggConfig.getName()) + .field(aggConfig.getField()); + + if (searchSettings.getGlobalSettings() != null) { + termsAgg.size(searchSettings.getGlobalSettings().getMaxAggregateSize()); + } + + return termsAgg; + } + return null; + } + + @Override + public void applyDeletedLogic(SearchRequest request, SearchSourceBuilder searchSourceBuilder) { + String index = request.getIndex(); + QueryBuilder currentQuery = searchSourceBuilder.query(); + + if (index.equalsIgnoreCase(Entity.getSearchRepository().getIndexOrAliasName("all")) + || index.equalsIgnoreCase(Entity.getSearchRepository().getIndexOrAliasName("dataAsset"))) { + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + boolQueryBuilder.should( + QueryBuilders.boolQuery() + .must(currentQuery) + .must(QueryBuilders.existsQuery("deleted")) + .must(QueryBuilders.termQuery("deleted", request.isDeleted()))); + boolQueryBuilder.should( + QueryBuilders.boolQuery() + .must(currentQuery) + .mustNot(QueryBuilders.existsQuery("deleted"))); + searchSourceBuilder.query(boolQueryBuilder); + } else { + searchSourceBuilder.query( + QueryBuilders.boolQuery() + .must(currentQuery) + .must(QueryBuilders.termQuery("deleted", request.isDeleted()))); + } + } + + @Override + public void applyGlossaryHierarchyLogic(SearchRequest request, SearchSourceBuilder searchSourceBuilder) { + if (request + .getIndex() + .equalsIgnoreCase(Entity.getSearchRepository().getIndexMapping(Entity.GLOSSARY_TERM).getIndexName(""))) { + searchSourceBuilder.query(QueryBuilders.boolQuery().must(searchSourceBuilder.query())); + + if (request.isGetHierarchy()) { + BoolQueryBuilder baseQuery = + QueryBuilders.boolQuery() + .should(searchSourceBuilder.query()) + .should(QueryBuilders.matchPhraseQuery("fullyQualifiedName", request.getQuery())) + .should(QueryBuilders.matchPhraseQuery("name", request.getQuery())) + .should(QueryBuilders.matchPhraseQuery("displayName", request.getQuery())) + .should(QueryBuilders.matchPhraseQuery("glossary.fullyQualifiedName", request.getQuery())) + .should(QueryBuilders.matchPhraseQuery("glossary.displayName", request.getQuery())) + .must(QueryBuilders.matchQuery("status", "Approved")) + .minimumShouldMatch(1); + searchSourceBuilder.query(baseQuery); + searchSourceBuilder.sort(SortBuilders.fieldSort("fullyQualifiedName").order(SortOrder.ASC)); + } + } + } + + public List buildSearchHierarchy(SearchRequest request, es.org.elasticsearch.action.search.SearchResponse searchResponse) { + if (request + .getIndex() + .equalsIgnoreCase( + Entity.getSearchRepository() + .getIndexMapping(Entity.GLOSSARY_TERM) + .getIndexName(""))) { + return buildGlossaryTermSearchHierarchy(searchResponse); + } + return new java.util.ArrayList<>(); + } + + private static List buildGlossaryTermSearchHierarchy(es.org.elasticsearch.action.search.SearchResponse searchResponse) { + Map termMap = new LinkedHashMap<>(); + Map rootTerms = new LinkedHashMap<>(); + + for (var hit : searchResponse.getHits().getHits()) { + String jsonSource = hit.getSourceAsString(); + EntityHierarchy__1 term = JsonUtils.readValue(jsonSource, EntityHierarchy__1.class); + EntityHierarchy__1 glossaryInfo = + JsonUtils.readTree(jsonSource).path("glossary").isMissingNode() + ? null + : JsonUtils.convertValue( + JsonUtils.readTree(jsonSource).path("glossary"), EntityHierarchy__1.class); + + if (glossaryInfo != null) { + rootTerms.putIfAbsent(glossaryInfo.getFullyQualifiedName(), glossaryInfo); + } + + term.setChildren(new java.util.ArrayList<>()); + termMap.putIfAbsent(term.getFullyQualifiedName(), term); + } + + termMap.putAll(rootTerms); + + for (EntityHierarchy__1 term : termMap.values()) { + String parentFQN = org.openmetadata.service.util.FullyQualifiedName.getParentFQN(term.getFullyQualifiedName()); + String termFQN = term.getFullyQualifiedName(); + + if (parentFQN != null && termMap.containsKey(parentFQN)) { + EntityHierarchy__1 parentTerm = termMap.get(parentFQN); + List children = parentTerm.getChildren(); + children.removeIf(child -> child.getFullyQualifiedName().equals(term.getFullyQualifiedName())); + children.add(term); + parentTerm.setChildren(children); + } else { + if (rootTerms.containsKey(termFQN)) { + EntityHierarchy__1 rootTerm = rootTerms.get(termFQN); + rootTerm.setChildren(term.getChildren()); + } + } + } + + return new java.util.ArrayList<>(rootTerms.values()); + } +} \ No newline at end of file diff --git a/openmetadata-service/src/main/resources/json/data/searchSettings/searchSettings.json b/openmetadata-service/src/main/resources/json/data/searchSettings/searchSettings.json index b468cc4a6993..f6b2d795fffe 100644 --- a/openmetadata-service/src/main/resources/json/data/searchSettings/searchSettings.json +++ b/openmetadata-service/src/main/resources/json/data/searchSettings/searchSettings.json @@ -46,7 +46,32 @@ "field": "tags.tagFQN" } ], - "highlightFields": ["name", "displayName", "description", "displayName.ngram", "name.ngram"] + "highlightFields": ["name", "displayName", "description", "displayName.ngram", "name.ngram"], + "tagBoosts": [ + { "field": "tier.tagFQN", "tagFQN": "Tier.Tier1", "boost": 50.0 }, + { "field": "tier.tagFQN", "tagFQN": "Tier.Tier2", "boost": 30.0 }, + { "field": "tier.tagFQN", "tagFQN": "Tier.Tier3", "boost": 15.0 } + ], + "fieldValueBoosts": [ + { + "field": "usageSummary.weeklyStats.count", + "factor": 4.0, + "modifier": "sqrt", + "missing": 1, + "condition": { + "range": { "gt": 0 } + } + }, + { + "field": "totalVotes", + "factor": 3.0, + "modifier": "ln1p", + "missing": 0, + "condition": { + "range": { "gt": 0 } + } + } + ] }, "assetTypeConfigurations": [ { diff --git a/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json b/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json index a0d8267d4f1b..647e5d48e8cd 100644 --- a/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json +++ b/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json @@ -38,6 +38,27 @@ "type": "array", "items": { "type": "string" }, "description": "Fields to include in the highlights." + }, + "boosts": { + "type": "array", + "items": { + "$ref": "#/definitions/fieldBoost" + }, + "description": "Global field boosts that apply to all asset types." + }, + "tagBoosts": { + "type": "array", + "items": { + "$ref": "#/definitions/tagBoost" + }, + "description": "Global tag-based boosts for fields like tier tags." + }, + "fieldValueBoosts": { + "type": "array", + "items": { + "$ref": "#/definitions/fieldValueBoost" + }, + "description": "Global field value-based boosts for usage, votes, etc." } }, "additionalProperties": false @@ -149,6 +170,7 @@ "tagBoost": { "type": "object", "properties": { + "field": { "type": "string", "description": "Field name to boost." }, "tagFQN": { "type": "string", "description": "Fully Qualified Name of the tag." }, "boost": { "type": "number", "description": "Boost factor for the tag." } }, diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts index d856cd7c0647..fff7dca9e5f1 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts @@ -10,14 +10,267 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - - - /** - * This schema defines the Rbac Search Configuration. +/** + * This schema defines the Search Configuration, including ranking logic and other settings + * per asset type. */ export interface SearchSettings { + /** + * List of search configurations for each asset type. + */ + assetTypeConfigurations?: AssetTypeConfiguration[]; + /** + * Default search configuration when an entity doesn't match. + */ + defaultConfiguration?: AssetTypeConfiguration; /** * Flag to enable or disable the RBAC Search Configuration. */ enableAccessControl?: boolean; + /** + * Global settings for search. + */ + globalSettings?: GlobalSettings; +} + +/** + * Defines the search configuration for a specific asset type. + * + * Default search configuration when an entity doesn't match. + */ +export interface AssetTypeConfiguration { + /** + * Additional settings specific to the asset type. + */ + additionalSettings?: { [key: string]: any }; + /** + * List of aggregations to include in the search query. + */ + aggregations?: Aggregation[]; + /** + * The type of data asset this configuration applies to. + */ + assetType: string; + /** + * Determines how the combined score and the query score are combined. + */ + boostMode?: BoostMode; + /** + * Boost factors for specific fields. + */ + boosts?: FieldBoost[]; + /** + * Fields to search with their boosts. + */ + fields: { [key: string]: number }; + /** + * Boost factors based on field values with function modifiers. + */ + fieldValueBoosts?: FieldValueBoost[]; + /** + * Fields to include in the highlights. + */ + highlightFields?: string[]; + /** + * Fields that must match in the search query. + */ + mustMatch?: string[]; + /** + * Fields that must not match. + */ + mustNotMatch?: string[]; + /** + * Determines how the computed scores are combined. + */ + scoreMode?: ScoreMode; + /** + * Fields that should match in the search query. + */ + shouldMatch?: string[]; + /** + * Boost factors for specific tags. + */ + tagBoosts?: TagBoost[]; +} + +/** + * Defines an aggregation for the search query. + */ +export interface Aggregation { + /** + * The field to aggregate on. + */ + field: string; + /** + * The name of the aggregation. + */ + name: string; + /** + * The type of aggregation. + */ + type: Type; +} + +/** + * The type of aggregation. + */ +export enum Type { + Avg = "avg", + DateHistogram = "date_histogram", + Filters = "filters", + Histogram = "histogram", + Max = "max", + Min = "min", + Missing = "missing", + Nested = "nested", + Range = "range", + ReverseNested = "reverse_nested", + Stats = "stats", + Sum = "sum", + Terms = "terms", + TopHits = "top_hits", +} + +/** + * Determines how the combined score and the query score are combined. + */ +export enum BoostMode { + Avg = "avg", + Max = "max", + Min = "min", + Multiply = "multiply", + Replace = "replace", + Sum = "sum", +} + +export interface FieldBoost { + /** + * Boost factor for the field. + */ + boost: number; + /** + * Field name to boost. + */ + field: string; +} + +export interface FieldValueBoost { + /** + * Condition to apply the boost. + */ + condition?: Condition; + /** + * Factor by which to multiply the field value. + */ + factor: number; + /** + * Field name whose value is used for boosting. + */ + field: string; + /** + * Value to use if the field is missing. + */ + missing?: number; + /** + * Modifier function to apply to the field value. + */ + modifier?: Modifier; +} + +/** + * Condition to apply the boost. + */ +export interface Condition { + range?: Range; +} + +export interface Range { + /** + * Greater than value. + */ + gt?: number; + /** + * Greater than or equal to value. + */ + gte?: number; + /** + * Less than value. + */ + lt?: number; + /** + * Less than or equal to value. + */ + lte?: number; +} + +/** + * Modifier function to apply to the field value. + */ +export enum Modifier { + Ln = "ln", + Ln1P = "ln1p", + Ln2P = "ln2p", + Log = "log", + Log1P = "log1p", + Log2P = "log2p", + None = "none", + Reciprocal = "reciprocal", + Sqrt = "sqrt", + Square = "square", +} + +/** + * Determines how the computed scores are combined. + */ +export enum ScoreMode { + Avg = "avg", + First = "first", + Max = "max", + Min = "min", + Multiply = "multiply", + Sum = "sum", +} + +export interface TagBoost { + /** + * Boost factor for the tag. + */ + boost: number; + /** + * Field name to boost. + */ + field?: string; + /** + * Fully Qualified Name of the tag. + */ + tagFQN: string; +} + +/** + * Global settings for search. + */ +export interface GlobalSettings { + /** + * List of aggregations to include in the search query. + */ + aggregations?: Aggregation[]; + /** + * Global field boosts that apply to all asset types. + */ + boosts?: FieldBoost[]; + /** + * Global field value-based boosts for usage, votes, etc. + */ + fieldValueBoosts?: FieldValueBoost[]; + /** + * Fields to include in the highlights. + */ + highlightFields?: string[]; + maxAggregateSize?: number; + maxAnalyzedOffset?: number; + maxResultHits?: number; + /** + * Global tag-based boosts for fields like tier tags. + */ + tagBoosts?: TagBoost[]; }