From afce583ec8dd03c3ac5d492166399ae2b0a45880 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Thu, 3 Aug 2023 01:08:23 +0200 Subject: [PATCH] Add ability to pass resource manager to prepare and execute --- core/index/index_writer.cpp | 12 +-- core/search/all_filter.cpp | 10 +- core/search/all_filter.hpp | 6 +- core/search/boolean_filter.cpp | 125 ++++++++++++++---------- core/search/boolean_filter.hpp | 38 +++---- core/search/column_existence_filter.cpp | 14 +-- core/search/column_existence_filter.hpp | 6 +- core/search/filter.cpp | 3 +- core/search/filter.hpp | 47 +++------ core/search/granular_range_filter.hpp | 10 +- core/search/levenshtein_filter.hpp | 10 +- core/search/nested_filter.cpp | 16 +-- core/search/nested_filter.hpp | 5 +- core/search/ngram_similarity_filter.cpp | 30 +++--- core/search/ngram_similarity_filter.hpp | 6 +- core/search/phrase_filter.cpp | 29 ++---- core/search/phrase_filter.hpp | 6 +- core/search/prefix_filter.hpp | 10 +- core/search/proxy_filter.cpp | 9 +- core/search/proxy_filter.hpp | 6 +- core/search/range_filter.hpp | 10 +- core/search/same_position_filter.cpp | 23 +++-- core/search/same_position_filter.hpp | 6 +- core/search/term_filter.hpp | 8 +- core/search/terms_filter.cpp | 53 +++++----- core/search/terms_filter.hpp | 6 +- core/search/wildcard_filter.hpp | 10 +- utils/index-search.cpp | 45 +++++++-- 28 files changed, 257 insertions(+), 302 deletions(-) diff --git a/core/index/index_writer.cpp b/core/index/index_writer.cpp index 3f0cbad43..a1d082e7c 100644 --- a/core/index/index_writer.cpp +++ b/core/index/index_writer.cpp @@ -199,13 +199,13 @@ void RemoveFromExistingSegment(DocumentMask& deleted_docs, return; } - auto prepared = query.filter->prepare(reader); + auto prepared = query.filter->prepare({.index = reader}); if (IRS_UNLIKELY(!prepared)) { return; // skip invalid prepared filters } - auto itr = prepared->execute(reader); + auto itr = prepared->execute({.segment = reader}); if (IRS_UNLIKELY(!itr)) { return; // skip invalid iterators @@ -232,12 +232,12 @@ bool RemoveFromImportedSegment(DocumentMask& deleted_docs, return false; } - auto prepared = query.filter->prepare(reader); + auto prepared = query.filter->prepare({.index = reader}); if (IRS_UNLIKELY(!prepared)) { return false; // skip invalid prepared filters } - auto itr = prepared->execute(reader); + auto itr = prepared->execute({.segment = reader}); if (IRS_UNLIKELY(!itr)) { return false; // skip invalid iterators } @@ -270,13 +270,13 @@ void FlushedSegmentContext::Remove(IndexWriter::QueryContext& query) { auto& document_mask = flushed.document_mask; - auto prepared = query.filter->prepare(*reader); + auto prepared = query.filter->prepare({.index = *reader}); if (IRS_UNLIKELY(!prepared)) { return; // Skip invalid prepared filters } - auto itr = prepared->execute(*reader); + auto itr = prepared->execute({.segment = *reader}); if (IRS_UNLIKELY(!itr)) { return; // Skip invalid iterators diff --git a/core/search/all_filter.cpp b/core/search/all_filter.cpp index 42f85e090..058c47761 100644 --- a/core/search/all_filter.cpp +++ b/core/search/all_filter.cpp @@ -47,19 +47,17 @@ class all_query : public filter::prepared { bstring stats_; }; -filter::prepared::ptr all::prepare(const IndexReader& reader, - const Scorers& order, score_t filter_boost, - const attribute_provider* /*ctx*/) const { +filter::prepared::ptr all::prepare(const PrepareContext& ctx) const { // skip field-level/term-level statistics because there are no explicit // fields/terms, but still collect index-level statistics // i.e. all fields and terms implicitly match - bstring stats(order.stats_size(), 0); + bstring stats(ctx.scorers.stats_size(), 0); auto* stats_buf = stats.data(); - PrepareCollectors(order.buckets(), stats_buf); + PrepareCollectors(ctx.scorers.buckets(), stats_buf); return memory::make_managed(std::move(stats), - this->boost() * filter_boost); + this->boost() * ctx.boost); } } // namespace irs diff --git a/core/search/all_filter.hpp b/core/search/all_filter.hpp index f5ed75baa..e39e15222 100644 --- a/core/search/all_filter.hpp +++ b/core/search/all_filter.hpp @@ -29,11 +29,7 @@ namespace irs { // Filter returning all documents class all : public filter { public: - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& reader, const Scorers& order, - score_t filter_boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; irs::type_info::type_id type() const noexcept final { return irs::type::id(); diff --git a/core/search/boolean_filter.cpp b/core/search/boolean_filter.cpp index 10df79d85..5754ffa65 100644 --- a/core/search/boolean_filter.cpp +++ b/core/search/boolean_filter.cpp @@ -180,27 +180,28 @@ class BooleanQuery : public filter::prepared { } } - virtual void prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, ScoreMergeType merge_type, - const attribute_provider* ctx, + virtual void prepare(const PrepareContext& ctx, ScoreMergeType merge_type, std::span incl, std::span excl) { BooleanQuery::queries_t queries; queries.reserve(incl.size() + excl.size()); // apply boost to the current node - this->boost(boost); + this->boost(ctx.boost); // prepare included for (const auto* filter : incl) { - queries.emplace_back(filter->prepare(rdr, ord, boost, ctx)); + queries.emplace_back(filter->prepare(ctx)); } // prepare excluded for (const auto* filter : excl) { // exclusion part does not affect scoring at all - queries.emplace_back( - filter->prepare(rdr, Scorers::kUnordered, irs::kNoBoost, ctx)); + queries.emplace_back(filter->prepare({ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .ctx = ctx.ctx, + })); } // nothrow block @@ -329,22 +330,28 @@ bool boolean_filter::equals(const filter& rhs) const noexcept { }); } -filter::prepared::ptr boolean_filter::prepare( - const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* ctx) const { +filter::prepared::ptr boolean_filter::prepare(const PrepareContext& ctx) const { const auto size = filters_.size(); if (IRS_UNLIKELY(size == 0)) { return prepared::empty(); } + const PrepareContext sub_ctx{ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .scorers = ctx.scorers, + .ctx = ctx.ctx, + .boost = ctx.boost * boost(), + }; + if (size == 1) { auto* filter = filters_.front().get(); IRS_ASSERT(filter); // FIXME(gnusi): let Not handle everything? if (filter->type() != irs::type::id()) { - return filter->prepare(rdr, ord, boost * this->boost(), ctx); + return filter->prepare(sub_ctx); } } @@ -363,7 +370,7 @@ filter::prepared::ptr boolean_filter::prepare( incl.push_back(all_docs_no_boost.get()); } - return prepare(incl, excl, rdr, ord, boost, ctx); + return PrepareBoolean(incl, excl, sub_ctx); } void boolean_filter::group_filters(filter::ptr& all_docs_zero_boost, @@ -414,11 +421,9 @@ void boolean_filter::group_filters(filter::ptr& all_docs_zero_boost, } } -filter::prepared::ptr And::prepare(std::vector& incl, - std::vector& excl, - const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const { +filter::prepared::ptr And::PrepareBoolean(std::vector& incl, + std::vector& excl, + const PrepareContext& ctx) const { // optimization step // if include group empty itself or has 'empty' -> this whole conjunction is // empty @@ -426,9 +431,17 @@ filter::prepared::ptr And::prepare(std::vector& incl, return prepared::empty(); } + PrepareContext sub_ctx{ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .scorers = ctx.scorers, + .ctx = ctx.ctx, + .boost = ctx.boost * boost(), + }; + // single node case if (1 == incl.size() && excl.empty()) { - return incl.front()->prepare(rdr, ord, this->boost() * boost, ctx); + return incl.front()->prepare(sub_ctx); } auto cumulative_all = MakeAllDocsFilter(kNoBoost); @@ -462,12 +475,12 @@ filter::prepared::ptr And::prepare(std::vector& incl, // substitute new_boost back we will get ( boost * OR_BOOST * ALL_BOOST + // boost * OR_BOOST * LEFT_BOOST) - original non-optimized boost value auto left_boost = (*incl.begin())->boost(); - if (this->boost() != 0 && left_boost != 0 && !ord.empty()) { - boost = (boost * this->boost() * all_boost + - boost * this->boost() * left_boost) / - (left_boost * this->boost()); + if (boost() != 0 && left_boost != 0 && !ctx.scorers.empty()) { + sub_ctx.boost = (sub_ctx.boost * boost() * all_boost + + sub_ctx.boost * boost() * left_boost) / + (left_boost * boost()); } else { - boost = 0; + sub_ctx.boost = 0; } } else { // create new 'all' with boost from all removed @@ -475,38 +488,47 @@ filter::prepared::ptr And::prepare(std::vector& incl, incl.push_back(cumulative_all.get()); } } - boost *= this->boost(); + sub_ctx.boost *= this->boost(); if (1 == incl.size() && excl.empty()) { // single node case - return incl.front()->prepare(rdr, ord, boost, ctx); + return incl.front()->prepare(sub_ctx); } auto q = memory::make_managed(); - q->prepare(rdr, ord, boost, merge_type(), ctx, incl, excl); + q->prepare(sub_ctx, merge_type(), incl, excl); return q; } -filter::prepared::ptr Or::prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const { +filter::prepared::ptr Or::prepare(const PrepareContext& ctx) const { + const PrepareContext sub_ctx{ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .scorers = ctx.scorers, + .ctx = ctx.ctx, + .boost = ctx.boost * boost(), + }; + if (0 == min_match_count_) { // only explicit 0 min match counts! // all conditions are satisfied - return MakeAllDocsFilter(kNoBoost)->prepare(rdr, ord, this->boost() * boost, - ctx); + return MakeAllDocsFilter(kNoBoost)->prepare(sub_ctx); } - return boolean_filter::prepare(rdr, ord, boost, ctx); + return boolean_filter::prepare(sub_ctx); } -filter::prepared::ptr Or::prepare(std::vector& incl, - std::vector& excl, - const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const { - boost *= this->boost(); +filter::prepared::ptr Or::PrepareBoolean(std::vector& incl, + std::vector& excl, + const PrepareContext& ctx) const { + const PrepareContext sub_ctx{ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .scorers = ctx.scorers, + .ctx = ctx.ctx, + .boost = ctx.boost * boost(), + }; if (0 == min_match_count_) { // only explicit 0 min match counts! // all conditions are satisfied - return MakeAllDocsFilter(kNoBoost)->prepare(rdr, ord, boost, ctx); + return MakeAllDocsFilter(kNoBoost)->prepare(sub_ctx); } if (!incl.empty() && incl.back()->type() == irs::type::id()) { @@ -519,7 +541,7 @@ filter::prepared::ptr Or::prepare(std::vector& incl, // single node case if (1 == incl.size() && excl.empty()) { - return incl.front()->prepare(rdr, ord, boost, ctx); + return incl.front()->prepare(sub_ctx); } auto cumulative_all = MakeAllDocsFilter(kNoBoost); @@ -537,7 +559,8 @@ filter::prepared::ptr Or::prepare(std::vector& incl, } } if (all_count != 0) { - if (ord.empty() && incl.size() > 1 && min_match_count_ <= all_count) { + if (ctx.scorers.empty() && incl.size() > 1 && + min_match_count_ <= all_count) { // if we have at least one all in include group - all other filters are // not necessary in case there is no scoring and 'all' count satisfies // min_match @@ -574,7 +597,7 @@ filter::prepared::ptr Or::prepare(std::vector& incl, if (1 == incl.size() && excl.empty()) { // single node case - return incl.front()->prepare(rdr, ord, boost, ctx); + return incl.front()->prepare(sub_ctx); } IRS_ASSERT(adjusted_min_match_count > 0 && @@ -589,20 +612,24 @@ filter::prepared::ptr Or::prepare(std::vector& incl, q = memory::make_managed(adjusted_min_match_count); } - q->prepare(rdr, ord, boost, merge_type(), ctx, incl, excl); + q->prepare(sub_ctx, merge_type(), incl, excl); return q; } -filter::prepared::ptr Not::prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const { +filter::prepared::ptr Not::prepare(const PrepareContext& ctx) const { const auto res = optimize_not(*this); if (!res.first) { return prepared::empty(); } - boost *= this->boost(); + const PrepareContext sub_ctx{ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .scorers = ctx.scorers, + .ctx = ctx.ctx, + .boost = ctx.boost * boost(), + }; if (res.second) { auto all_docs = MakeAllDocsFilter(kNoBoost); @@ -610,12 +637,12 @@ filter::prepared::ptr Not::prepare(const IndexReader& rdr, const Scorers& ord, const std::array excl{res.first}; auto q = memory::make_managed(); - q->prepare(rdr, ord, boost, ScoreMergeType::kSum, ctx, incl, excl); + q->prepare(sub_ctx, ScoreMergeType::kSum, incl, excl); return q; } // negation has been optimized out - return res.first->prepare(rdr, ord, boost, ctx); + return res.first->prepare(sub_ctx); } size_t Not::hash() const noexcept { diff --git a/core/search/boolean_filter.hpp b/core/search/boolean_filter.hpp index cdf15ed3f..349bfd14c 100644 --- a/core/search/boolean_filter.hpp +++ b/core/search/boolean_filter.hpp @@ -61,17 +61,14 @@ class boolean_filter : public filter, public AllDocsProvider { bool empty() const { return filters_.empty(); } size_t size() const { return filters_.size(); } - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const override; + filter::prepared::ptr prepare(const PrepareContext& ctx) const override; protected: bool equals(const filter& rhs) const noexcept final; - virtual filter::prepared::ptr prepare( + virtual filter::prepared::ptr PrepareBoolean( std::vector& incl, std::vector& excl, - const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* ctx) const = 0; + const PrepareContext& ctx) const = 0; private: void group_filters(filter::ptr& all_docs_no_boost, @@ -85,18 +82,14 @@ class boolean_filter : public filter, public AllDocsProvider { // Represents conjunction class And final : public boolean_filter { public: - using filter::prepare; - type_info::type_id type() const noexcept final { return irs::type::id(); } protected: - filter::prepared::ptr prepare(std::vector& incl, - std::vector& excl, - const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr PrepareBoolean(std::vector& incl, + std::vector& excl, + const PrepareContext& ctx) const final; }; // Represents disjunction @@ -111,19 +104,14 @@ class Or final : public boolean_filter { return *this; } - using filter::prepare; - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; type_info::type_id type() const noexcept final { return irs::type::id(); } protected: - filter::prepared::ptr prepare(std::vector& incl, - std::vector& excl, - const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr PrepareBoolean(std::vector& incl, + std::vector& excl, + const PrepareContext& ctx) const final; private: size_t min_match_count_{1}; @@ -154,11 +142,7 @@ class Not : public filter, public AllDocsProvider { void clear() { filter_.reset(); } bool empty() const { return nullptr == filter_; } - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; size_t hash() const noexcept final; diff --git a/core/search/column_existence_filter.cpp b/core/search/column_existence_filter.cpp index c9f89e116..3dc19c770 100644 --- a/core/search/column_existence_filter.cpp +++ b/core/search/column_existence_filter.cpp @@ -25,10 +25,9 @@ #include "formats/empty_term_reader.hpp" #include "search/disjunction.hpp" +namespace irs { namespace { -using namespace irs; - class column_existence_query : public irs::filter::prepared { public: column_existence_query(std::string_view field, bstring&& stats, score_t boost) @@ -129,20 +128,17 @@ class column_prefix_existence_query : public column_existence_query { } // namespace -namespace irs { - filter::prepared::ptr by_column_existence::prepare( - const IndexReader& reader, const Scorers& order, score_t filter_boost, - const attribute_provider* /*ctx*/) const { + const PrepareContext& ctx) const { // skip field-level/term-level statistics because there are no explicit // fields/terms, but still collect index-level statistics // i.e. all fields and terms implicitly match - bstring stats(order.stats_size(), 0); + bstring stats(ctx.scorers.stats_size(), 0); auto* stats_buf = stats.data(); - PrepareCollectors(order.buckets(), stats_buf); + PrepareCollectors(ctx.scorers.buckets(), stats_buf); - filter_boost *= boost(); + const auto filter_boost = ctx.boost * boost(); auto& acceptor = options().acceptor; diff --git a/core/search/column_existence_filter.hpp b/core/search/column_existence_filter.hpp index 5f289d141..d2882ec36 100644 --- a/core/search/column_existence_filter.hpp +++ b/core/search/column_existence_filter.hpp @@ -49,11 +49,7 @@ struct by_column_existence_options { class by_column_existence final : public filter_base { public: - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; }; } // namespace irs diff --git a/core/search/filter.cpp b/core/search/filter.cpp index e7db6777e..5803458c2 100644 --- a/core/search/filter.cpp +++ b/core/search/filter.cpp @@ -47,8 +47,7 @@ filter::prepared::ptr filter::prepared::empty() { return memory::to_managed(kEmptyQuery); } -filter::prepared::ptr empty::prepare(const IndexReader&, const Scorers&, - score_t, const attribute_provider*) const { +filter::prepared::ptr empty::prepare(const PrepareContext& /*ctx*/) const { return memory::to_managed(kEmptyQuery); } diff --git a/core/search/filter.hpp b/core/search/filter.hpp index 00557e146..2b193d83f 100644 --- a/core/search/filter.hpp +++ b/core/search/filter.hpp @@ -35,10 +35,19 @@ namespace irs { struct IndexReader; struct PreparedStateVisitor; +struct PrepareContext { + const IndexReader& index; + IResourceManager& resource_manager = IResourceManager::kNoop; + const Scorers& scorers = Scorers::kUnordered; + const attribute_provider* ctx = nullptr; + score_t boost = kNoBoost; +}; + struct ExecutionContext { const SubReader& segment; - const Scorers& scorers; - const attribute_provider* ctx{}; + IResourceManager& resource_manager = IResourceManager::kNoop; + const Scorers& scorers = Scorers::kUnordered; + const attribute_provider* ctx = nullptr; WandContext wand; }; @@ -54,12 +63,6 @@ class filter { explicit prepared(score_t boost = kNoBoost) noexcept : boost_(boost) {} - doc_iterator::ptr execute( - const SubReader& segment, - const Scorers& scorers = Scorers::kUnordered) const { - return execute({.segment = segment, .scorers = scorers}); - } - virtual doc_iterator::ptr execute(const ExecutionContext& ctx) const = 0; virtual void visit(const SubReader& segment, PreparedStateVisitor& visitor, @@ -84,29 +87,7 @@ class filter { bool operator==(const filter& rhs) const noexcept { return equals(rhs); } - // boost - external boost - virtual filter::prepared::ptr prepare( - const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* ctx) const = 0; - - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - const attribute_provider* ctx) const { - return prepare(rdr, ord, irs::kNoBoost, ctx); - } - - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost) const { - return prepare(rdr, ord, boost, nullptr); - } - - filter::prepared::ptr prepare(const IndexReader& rdr, - const Scorers& ord) const { - return prepare(rdr, ord, irs::kNoBoost); - } - - filter::prepared::ptr prepare(const IndexReader& rdr) const { - return prepare(rdr, Scorers::kUnordered); - } + virtual filter::prepared::ptr prepare(const PrepareContext& ctx) const = 0; score_t boost() const noexcept { return boost_; } @@ -185,9 +166,7 @@ class filter_base : public filter_with_options { // Filter which returns no documents class empty final : public filter { public: - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; type_info::type_id type() const noexcept final { return irs::type::id(); diff --git a/core/search/granular_range_filter.hpp b/core/search/granular_range_filter.hpp index c6cfef17e..2a61d98c6 100644 --- a/core/search/granular_range_filter.hpp +++ b/core/search/granular_range_filter.hpp @@ -107,13 +107,9 @@ class by_granular_range : public filter_base { const options_type::range_type& rng, filter_visitor& visitor); - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& index, const Scorers& ord, - score_t boost, - const attribute_provider* /*ctx*/) const final { - return prepare(index, ord, this->boost() * boost, field(), options().range, - options().scored_terms_limit); + filter::prepared::ptr prepare(const PrepareContext& ctx) const final { + return prepare(ctx.index, ctx.scorers, ctx.boost * boost(), field(), + options().range, options().scored_terms_limit); } }; diff --git a/core/search/levenshtein_filter.hpp b/core/search/levenshtein_filter.hpp index 97da4bb49..1a9746534 100644 --- a/core/search/levenshtein_filter.hpp +++ b/core/search/levenshtein_filter.hpp @@ -119,13 +119,9 @@ class by_edit_distance final : public filter_base { static field_visitor visitor(const options_type::filter_options& options); - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& index, const Scorers& order, - score_t boost, - const attribute_provider* /*ctx*/) const final { - return prepare(index, order, this->boost() * boost, field(), options().term, - options().max_terms, options().max_distance, + filter::prepared::ptr prepare(const PrepareContext& ctx) const final { + return prepare(ctx.index, ctx.scorers, ctx.boost * boost(), field(), + options().term, options().max_terms, options().max_distance, options().provider, options().with_transpositions, options().prefix); } diff --git a/core/search/nested_filter.cpp b/core/search/nested_filter.cpp index 980e41bc0..2b06c5e98 100644 --- a/core/search/nested_filter.cpp +++ b/core/search/nested_filter.cpp @@ -687,18 +687,22 @@ doc_iterator::ptr ByNestedQuery::execute(const ExecutionContext& ctx) const { }); } -filter::prepared::ptr ByNestedFilter::prepare( - const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* ctx) const { +filter::prepared::ptr ByNestedFilter::prepare(const PrepareContext& ctx) const { auto& [parent, child, match, merge_type] = options(); if (!parent || !child || !IsValid(match)) { return prepared::empty(); } - boost *= this->boost(); + const auto sub_boost = ctx.boost * boost(); - auto prepared_child = child->prepare(rdr, GetOrder(match, ord), boost, ctx); + auto prepared_child = child->prepare({ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .scorers = GetOrder(match, ctx.scorers), + .ctx = ctx.ctx, + .boost = sub_boost, + }); if (!prepared_child) { return prepared::empty(); @@ -706,7 +710,7 @@ filter::prepared::ptr ByNestedFilter::prepare( return memory::make_managed(parent, std::move(prepared_child), merge_type, match, - /*none_boost*/ boost); + /*none_boost*/ sub_boost); } } // namespace irs diff --git a/core/search/nested_filter.hpp b/core/search/nested_filter.hpp index d4627b0c4..a793b5366 100644 --- a/core/search/nested_filter.hpp +++ b/core/search/nested_filter.hpp @@ -109,10 +109,7 @@ struct ByNestedOptions { // Filter is capable of finding parents by the corresponding child filter. class ByNestedFilter final : public filter_with_options { public: - using filter::prepare; - - prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* /*ctx*/) const final; + prepared::ptr prepare(const PrepareContext& ctx) const final; }; } // namespace irs diff --git a/core/search/ngram_similarity_filter.cpp b/core/search/ngram_similarity_filter.cpp index 6eb806a02..76d2a2832 100644 --- a/core/search/ngram_similarity_filter.cpp +++ b/core/search/ngram_similarity_filter.cpp @@ -32,8 +32,7 @@ namespace irs { filter::prepared::ptr by_ngram_similarity::prepare( - const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* ctx) const { + const PrepareContext& ctx) const { const auto threshold = std::max(0.f, std::min(1.f, options().threshold)); const auto& ngrams = options().ngrams; @@ -47,18 +46,24 @@ filter::prepared::ptr by_ngram_similarity::prepare( std::ceil(static_cast(ngrams.size()) * threshold)), size_t{1}); - if (ord.empty() && 1 == min_match_count) { + const auto sub_boost = ctx.boost * boost(); + + if (ctx.scorers.empty() && 1 == min_match_count) { irs::by_terms disj; for (auto& terms = disj.mutable_options()->terms; auto& term : options().ngrams) { terms.emplace(term, irs::kNoBoost); } *disj.mutable_field() = this->field(); - disj.boost(this->boost()); - return disj.prepare(rdr, irs::Scorers::kUnordered, boost, ctx); + return disj.prepare({ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .ctx = ctx.ctx, + .boost = sub_boost, + }); } - NGramStates query_states{rdr.size()}; + NGramStates query_states{ctx.index.size()}; // per segment terms states const auto terms_count = ngrams.size(); @@ -66,12 +71,12 @@ filter::prepared::ptr by_ngram_similarity::prepare( term_states.reserve(terms_count); // prepare ngrams stats - field_collectors field_stats{ord}; - term_collectors term_stats{ord, terms_count}; + field_collectors field_stats{ctx.scorers}; + term_collectors term_stats{ctx.scorers, terms_count}; const std::string_view field_name = this->field(); - for (const auto& segment : rdr) { + for (const auto& segment : ctx.index) { // get term dictionary for field const term_reader* field = segment.field(field_name); @@ -119,16 +124,15 @@ filter::prepared::ptr by_ngram_similarity::prepare( term_states.reserve(terms_count); } - bstring stats(ord.stats_size(), 0); + bstring stats(ctx.scorers.stats_size(), 0); auto* stats_buf = stats.data(); for (size_t term_idx = 0; term_idx < terms_count; ++term_idx) { - term_stats.finish(stats_buf, term_idx, field_stats, rdr); + term_stats.finish(stats_buf, term_idx, field_stats, ctx.index); } return memory::make_managed( - min_match_count, std::move(query_states), std::move(stats), - this->boost() * boost); + min_match_count, std::move(query_states), std::move(stats), sub_boost); } } // namespace irs diff --git a/core/search/ngram_similarity_filter.hpp b/core/search/ngram_similarity_filter.hpp index dd6a6e371..973d788f8 100644 --- a/core/search/ngram_similarity_filter.hpp +++ b/core/search/ngram_similarity_filter.hpp @@ -51,11 +51,7 @@ struct by_ngram_similarity_options { class by_ngram_similarity : public filter_base { public: - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; }; } // namespace irs diff --git a/core/search/phrase_filter.cpp b/core/search/phrase_filter.cpp index 09108ebe4..8fa9ee555 100644 --- a/core/search/phrase_filter.cpp +++ b/core/search/phrase_filter.cpp @@ -31,11 +31,10 @@ #include "search/states/phrase_state.hpp" #include "search/states_cache.hpp" +namespace irs { namespace { -using namespace irs; - -struct get_visitor { +struct GetVisitor { using result_type = field_visitor; result_type operator()(const by_term_options& part) const { @@ -85,7 +84,7 @@ struct get_visitor { } }; -struct prepare : util::noncopyable { +struct Prepare : util::noncopyable { using result_type = filter::prepared::ptr; result_type operator()(const by_term_options& opts) const { @@ -124,8 +123,8 @@ struct prepare : util::noncopyable { return filter::prepared::empty(); } - prepare(const IndexReader& index, const Scorers& order, std::string_view field, - const score_t boost) noexcept + Prepare(const IndexReader& index, const Scorers& order, + std::string_view field, const score_t boost) noexcept : index(index), order(order), field(field), boost(boost) {} const IndexReader& index; @@ -136,8 +135,6 @@ struct prepare : util::noncopyable { } // namespace -namespace irs { - // Filter visitor for phrase queries template class phrase_term_visitor final : public filter_visitor, @@ -198,13 +195,7 @@ class phrase_term_visitor final : public filter_visitor, bool volatile_boost_ = false; }; -} // namespace irs - -namespace irs { - -filter::prepared::ptr by_phrase::prepare( - const IndexReader& index, const Scorers& ord, score_t boost, - const attribute_provider* /*ctx*/) const { +filter::prepared::ptr by_phrase::prepare(const PrepareContext& ctx) const { if (field().empty() || options().empty()) { // empty field or phrase return filter::prepared::empty(); @@ -212,7 +203,7 @@ filter::prepared::ptr by_phrase::prepare( if (1 == options().size()) { auto query = - std::visit(::prepare{index, ord, field(), this->boost() * boost}, + std::visit(Prepare{ctx.index, ctx.scorers, field(), ctx.boost * boost()}, options().begin()->second); if (query) { @@ -222,10 +213,10 @@ filter::prepared::ptr by_phrase::prepare( // prepare phrase stats (collector for each term) if (options().simple()) { - return fixed_prepare_collect(index, ord, boost); + return fixed_prepare_collect(ctx.index, ctx.scorers, ctx.boost); } - return variadic_prepare_collect(index, ord, boost); + return variadic_prepare_collect(ctx.index, ctx.scorers, ctx.boost); } filter::prepared::ptr by_phrase::fixed_prepare_collect(const IndexReader& index, @@ -331,7 +322,7 @@ filter::prepared::ptr by_phrase::variadic_prepare_collect( phrase_part_stats.reserve(phrase_size); for (const auto& word : options()) { phrase_part_stats.emplace_back(ord, 0); - phrase_part_visitors.emplace_back(std::visit(get_visitor{}, word.second)); + phrase_part_visitors.emplace_back(std::visit(GetVisitor{}, word.second)); } // per segment phrase states diff --git a/core/search/phrase_filter.hpp b/core/search/phrase_filter.hpp index 402f4b116..27a6d23f1 100644 --- a/core/search/phrase_filter.hpp +++ b/core/search/phrase_filter.hpp @@ -146,11 +146,7 @@ class by_phrase_options { // Phrase filter class by_phrase : public filter_base { public: - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& index, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; private: filter::prepared::ptr fixed_prepare_collect(const IndexReader& index, diff --git a/core/search/prefix_filter.hpp b/core/search/prefix_filter.hpp index 66ee0b3f4..919d7d93c 100644 --- a/core/search/prefix_filter.hpp +++ b/core/search/prefix_filter.hpp @@ -79,13 +79,9 @@ class by_prefix : public filter_base { static void visit(const SubReader& segment, const term_reader& reader, bytes_view prefix, filter_visitor& visitor); - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& index, const Scorers& ord, - score_t boost, - const attribute_provider* /*ctx*/) const final { - return prepare(index, ord, this->boost() * boost, field(), options().term, - options().scored_terms_limit); + filter::prepared::ptr prepare(const PrepareContext& ctx) const final { + return prepare(ctx.index, ctx.scorers, ctx.boost * boost(), field(), + options().term, options().scored_terms_limit); } }; diff --git a/core/search/proxy_filter.cpp b/core/search/proxy_filter.cpp index 2075637d4..88e42c6ea 100644 --- a/core/search/proxy_filter.cpp +++ b/core/search/proxy_filter.cpp @@ -201,22 +201,19 @@ class proxy_query : public filter::prepared { mutable proxy_filter::cache_ptr cache_; }; -filter::prepared::ptr proxy_filter::prepare( - const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* ctx) const { +filter::prepared::ptr proxy_filter::prepare(const PrepareContext& ctx) const { if (!cache_ || !cache_->real_filter_) { IRS_ASSERT(false); return filter::prepared::empty(); } - if (!ord.empty()) { + if (!ctx.scorers.empty()) { // Currently we do not support caching scores. // Proxy filter should not be used with scorers! IRS_ASSERT(false); return filter::prepared::empty(); } if (!cache_->prepared_real_filter_) { - cache_->prepared_real_filter_ = - cache_->real_filter_->prepare(rdr, ord, boost, ctx); + cache_->prepared_real_filter_ = cache_->real_filter_->prepare(ctx); } return memory::make_managed(cache_); } diff --git a/core/search/proxy_filter.hpp b/core/search/proxy_filter.hpp index f2f475f2f..e63532632 100644 --- a/core/search/proxy_filter.hpp +++ b/core/search/proxy_filter.hpp @@ -42,11 +42,7 @@ class proxy_filter final : public filter { public: using cache_ptr = std::shared_ptr; - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers&, - score_t boost, - const attribute_provider*) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; template std::pair set_filter(Args&&... args) { diff --git a/core/search/range_filter.hpp b/core/search/range_filter.hpp index 526b6257c..d0b713bc9 100644 --- a/core/search/range_filter.hpp +++ b/core/search/range_filter.hpp @@ -86,13 +86,9 @@ class by_range : public filter_base { const options_type::range_type& rng, filter_visitor& visitor); - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& index, const Scorers& ord, - score_t boost, - const attribute_provider* /*ctx*/) const final { - return prepare(index, ord, this->boost() * boost, field(), options().range, - options().scored_terms_limit); + filter::prepared::ptr prepare(const PrepareContext& ctx) const final { + return prepare(ctx.index, ctx.scorers, ctx.boost * boost(), field(), + options().range, options().scored_terms_limit); } }; diff --git a/core/search/same_position_filter.cpp b/core/search/same_position_filter.cpp index 3e4eb2953..1b47e8eeb 100644 --- a/core/search/same_position_filter.cpp +++ b/core/search/same_position_filter.cpp @@ -193,8 +193,7 @@ class same_position_query : public filter::prepared { } // namespace filter::prepared::ptr by_same_position::prepare( - const IndexReader& index, const Scorers& ord, score_t boost, - const attribute_provider* /*ctx*/) const { + const PrepareContext& ctx) const { auto& terms = options().terms; const auto size = terms.size(); @@ -204,7 +203,7 @@ filter::prepared::ptr by_same_position::prepare( } // per segment query state - same_position_query::states_t query_states{index.size()}; + same_position_query::states_t query_states{ctx.index.size()}; // per segment terms states same_position_query::states_t::state_type term_states; @@ -213,12 +212,12 @@ filter::prepared::ptr by_same_position::prepare( // !!! FIXME !!! // that's completely wrong, we have to collect stats for each field // instead of aggregating them using a single collector - field_collectors field_stats(ord); + field_collectors field_stats(ctx.scorers); // prepare phrase stats (collector for each term) - term_collectors term_stats(ord, size); + term_collectors term_stats(ctx.scorers, size); - for (const auto& segment : index) { + for (const auto& segment : ctx.index) { size_t term_idx = 0; for (const auto& branch : terms) { @@ -236,14 +235,14 @@ filter::prepared::ptr by_same_position::prepare( continue; } - field_stats.collect(segment, - *field); // collect field statistics once per segment + // collect field statistics once per segment + field_stats.collect(segment, *field); // find terms seek_term_iterator::ptr term = field->iterator(SeekMode::NORMAL); if (!term->seek(branch.second)) { - if (ord.empty()) { + if (ctx.scorers.empty()) { break; } else { // continue here because we should collect @@ -278,13 +277,13 @@ filter::prepared::ptr by_same_position::prepare( size_t term_idx = 0; same_position_query::stats_t stats(size); for (auto& stat : stats) { - stat.resize(ord.stats_size()); + stat.resize(ctx.scorers.stats_size()); auto* stats_buf = stat.data(); - term_stats.finish(stats_buf, term_idx++, field_stats, index); + term_stats.finish(stats_buf, term_idx++, field_stats, ctx.index); } return memory::make_managed( - std::move(query_states), std::move(stats), this->boost() * boost); + std::move(query_states), std::move(stats), ctx.boost * boost()); } } // namespace irs diff --git a/core/search/same_position_filter.hpp b/core/search/same_position_filter.hpp index 01a211547..e1cb64fd7 100644 --- a/core/search/same_position_filter.hpp +++ b/core/search/same_position_filter.hpp @@ -58,11 +58,7 @@ class by_same_position : public filter_with_options { static constexpr IndexFeatures kRequiredFeatures = IndexFeatures::FREQ | IndexFeatures::POS; - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, - score_t boost, - const attribute_provider* ctx) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; }; } // namespace irs diff --git a/core/search/term_filter.hpp b/core/search/term_filter.hpp index 6b325c734..8db25fe53 100644 --- a/core/search/term_filter.hpp +++ b/core/search/term_filter.hpp @@ -53,11 +53,9 @@ class by_term : public filter_base { static void visit(const SubReader& segment, const term_reader& field, bytes_view term, filter_visitor& visitor); - using filter::prepare; - - prepared::ptr prepare(const IndexReader& rdr, const Scorers& ord, score_t boost, - const attribute_provider* /*ctx*/) const final { - return prepare(rdr, ord, boost * this->boost(), field(), options().term); + prepared::ptr prepare(const PrepareContext& ctx) const final { + return prepare(ctx.index, ctx.scorers, ctx.boost * boost(), field(), + options().term); } }; diff --git a/core/search/terms_filter.cpp b/core/search/terms_filter.cpp index 20aff2a7b..539f2a4f6 100644 --- a/core/search/terms_filter.cpp +++ b/core/search/terms_filter.cpp @@ -31,14 +31,13 @@ #include "search/multiterm_query.hpp" #include "search/term_filter.hpp" +namespace irs { namespace { -using namespace irs; - template -void visit(const SubReader& segment, const term_reader& field, - const by_terms_options::search_terms& search_terms, - Visitor& visitor) { +void VisitImpl(const SubReader& segment, const term_reader& field, + const by_terms_options::search_terms& search_terms, + Visitor& visitor) { auto terms = field.iterator(SeekMode::NORMAL); if (IRS_UNLIKELY(!terms)) { @@ -93,26 +92,21 @@ void collect_terms(const IndexReader& index, std::string_view field, continue; } - visit(segment, *reader, terms, visitor); + VisitImpl(segment, *reader, terms, visitor); } } } // namespace -namespace irs { - void by_terms::visit(const SubReader& segment, const term_reader& field, const by_terms_options::search_terms& terms, filter_visitor& visitor) { - ::visit(segment, field, terms, visitor); + VisitImpl(segment, field, terms, visitor); } -filter::prepared::ptr by_terms::prepare(const IndexReader& index, - const Scorers& order, score_t boost, - const attribute_provider* ctx) const { +filter::prepared::ptr by_terms::prepare(const PrepareContext& ctx) const { const auto& [terms, min_match, merge_type] = options(); const size_t size = terms.size(); - boost *= this->boost(); if (0 == size || min_match > size) { // Empty or unreachable search criteria @@ -120,29 +114,38 @@ filter::prepared::ptr by_terms::prepare(const IndexReader& index, } if (0 == min_match) { - if (order.empty()) { - return MakeAllDocsFilter(kNoBoost)->prepare(index); + if (ctx.scorers.empty()) { + return MakeAllDocsFilter(kNoBoost)->prepare({ + .index = ctx.index, + }); } else { Or disj; // Don't contribute to the score disj.add(MakeAllDocsFilter(0.)); // Reset min_match to 1 disj.add(*this).mutable_options()->min_match = 1; - return disj.prepare(index, order, kNoBoost, ctx); + return disj.prepare({ + .index = ctx.index, + .resource_manager = ctx.resource_manager, + .scorers = ctx.scorers, + .ctx = ctx.ctx, + }); } } + const auto sub_boost = ctx.boost * boost(); + if (1 == size) { const auto term = std::begin(terms); - return by_term::prepare(index, order, boost * term->boost, field(), - term->term); + return by_term::prepare(ctx.index, ctx.scorers, sub_boost * term->boost, + field(), term->term); } - field_collectors field_stats{order}; - term_collectors term_stats{order, size}; - MultiTermQuery::States states{index.size()}; + field_collectors field_stats{ctx.scorers}; + term_collectors term_stats{ctx.scorers, size}; + MultiTermQuery::States states{ctx.index.size()}; all_terms_collector collector{states, field_stats, term_stats}; - collect_terms(index, field(), terms, collector); + collect_terms(ctx.index, field(), terms, collector); // FIXME(gnusi): Filter out unmatched states during collection if (min_match > 1) { @@ -157,13 +160,13 @@ filter::prepared::ptr by_terms::prepare(const IndexReader& index, MultiTermQuery::Stats stats{size}; for (size_t term_idx = 0; auto& stat : stats) { - stat.resize(order.stats_size(), 0); + stat.resize(ctx.scorers.stats_size(), 0); auto* stats_buf = stat.data(); - term_stats.finish(stats_buf, term_idx++, field_stats, index); + term_stats.finish(stats_buf, term_idx++, field_stats, ctx.index); } return memory::make_managed( - std::move(states), std::move(stats), boost, merge_type, min_match); + std::move(states), std::move(stats), sub_boost, merge_type, min_match); } } // namespace irs diff --git a/core/search/terms_filter.hpp b/core/search/terms_filter.hpp index f750ac09a..186d1f72b 100644 --- a/core/search/terms_filter.hpp +++ b/core/search/terms_filter.hpp @@ -90,11 +90,7 @@ class by_terms final : public filter_base, const by_terms_options::search_terms& terms, filter_visitor& visitor); - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& index, const Scorers& order, - score_t boost, - const attribute_provider* /*ctx*/) const final; + filter::prepared::ptr prepare(const PrepareContext& ctx) const final; }; } // namespace irs diff --git a/core/search/wildcard_filter.hpp b/core/search/wildcard_filter.hpp index 679d2fa35..3e78d19b2 100644 --- a/core/search/wildcard_filter.hpp +++ b/core/search/wildcard_filter.hpp @@ -69,13 +69,9 @@ class by_wildcard final : public filter_base { static field_visitor visitor(bytes_view term); - using filter::prepare; - - filter::prepared::ptr prepare(const IndexReader& index, const Scorers& order, - score_t boost, - const attribute_provider* /*ctx*/) const final { - return prepare(index, order, this->boost() * boost, field(), options().term, - options().scored_terms_limit); + filter::prepared::ptr prepare(const PrepareContext& ctx) const final { + return prepare(ctx.index, ctx.scorers, this->boost() * ctx.boost, field(), + options().term, options().scored_terms_limit); } }; diff --git a/utils/index-search.cpp b/utils/index-search.cpp index fcc87ce84..2791d918c 100644 --- a/utils/index-search.cpp +++ b/utils/index-search.cpp @@ -257,7 +257,10 @@ irs::filter::prepared::ptr prepareFilter( *query.mutable_field() = "body"; query.mutable_options()->term = irs::ViewCast(terms); - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::HighPhrase: // fall through case category_t::MedPhrase: // fall through @@ -277,7 +280,10 @@ irs::filter::prepared::ptr prepareFilter( opts->push_back().term = term->value; } - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::HighNGram: // fall through case category_t::MedNGram: // fall through @@ -303,7 +309,10 @@ irs::filter::prepared::ptr prepareFilter( tmpBuf.size()); } } - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::AndHighHigh: // fall through case category_t::AndHighMed: // fall through @@ -324,7 +333,10 @@ irs::filter::prepared::ptr prepareFilter( irs::ViewCast(std::string_view(tmpBuf.c_str() + 1)); } - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::Or4High: case category_t::Or6High4Med2Low: @@ -345,7 +357,10 @@ irs::filter::prepared::ptr prepareFilter( irs::ViewCast(std::string_view{tmpBuf}); } - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::Prefix3: { // cut '~' at the end of the text @@ -357,7 +372,10 @@ irs::filter::prepared::ptr prepareFilter( opts->scored_terms_limit = scored_terms_limit; opts->term = irs::ViewCast(terms); - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::Wildcard: { terms = std::string_view(text.data(), text.size()); @@ -379,7 +397,10 @@ irs::filter::prepared::ptr prepareFilter( } } - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::Fuzzy1: case category_t::Fuzzy2: { @@ -394,7 +415,10 @@ irs::filter::prepared::ptr prepareFilter( opts->max_distance = (category == category_t::Fuzzy1 ? 1 : 2); opts->term = irs::ViewCast(term); - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } case category_t::MinMatch2High2Med: { if (irs::IsNull(terms = splitFreq(text))) { @@ -415,7 +439,10 @@ irs::filter::prepared::ptr prepareFilter( irs::ViewCast(std::string_view{tmpBuf}); } } - return query.prepare(reader, order); + return query.prepare({ + .index = reader, + .scorers = order, + }); } default: return nullptr;