From 1bb161a12740a282c71ef80dec42a24323ec0de5 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Thu, 30 Nov 2023 19:32:03 +0100 Subject: [PATCH 1/2] WIP --- core/CMakeLists.txt | 1 + core/search/boolean_filter.cpp | 264 +---------------- core/search/boolean_query.cpp | 248 ++++++++++++++++ core/search/boolean_query.hpp | 102 +++++++ core/search/column_existence_filter.cpp | 2 +- core/search/conjunction.hpp | 17 +- core/search/disjunction.hpp | 3 - core/search/min_match_disjunction.hpp | 237 ++++++++-------- core/search/multiterm_query.cpp | 3 +- core/search/ngram_similarity_filter.cpp | 6 +- core/search/ngram_similarity_filter.hpp | 6 +- core/search/ngram_similarity_query.cpp | 119 +++++--- core/search/ngram_similarity_query.hpp | 2 +- core/search/phrase_iterator.hpp | 9 +- core/search/phrase_query.cpp | 16 +- core/search/same_position_filter.cpp | 2 +- tests/search/boolean_filter_tests.cpp | 358 ++++++++++-------------- 17 files changed, 715 insertions(+), 680 deletions(-) create mode 100644 core/search/boolean_query.cpp create mode 100644 core/search/boolean_query.hpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 86b5a0ff9..4fb6100c2 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -132,6 +132,7 @@ set(IResearch_core_sources ./search/multiterm_query.cpp ./search/term_query.cpp ./search/boolean_filter.cpp + ./search/boolean_query.cpp ./search/ngram_similarity_filter.cpp ./search/ngram_similarity_query.cpp ./search/proxy_filter.cpp diff --git a/core/search/boolean_filter.cpp b/core/search/boolean_filter.cpp index 22a55565c..a3dc4a7c2 100644 --- a/core/search/boolean_filter.cpp +++ b/core/search/boolean_filter.cpp @@ -29,6 +29,7 @@ #include "exclusion.hpp" #include "min_match_disjunction.hpp" #include "prepared_state_visitor.hpp" +#include "search/boolean_query.hpp" namespace { @@ -45,273 +46,10 @@ std::pair optimize_not(const irs::Not& node) { return std::make_pair(inner, neg); } -template -irs::ScoreAdapters MakeScoreAdapters( - const irs::ExecutionContext& ctx, It begin, It end) { - IRS_ASSERT(begin <= end); - const size_t size = std::distance(begin, end); - irs::ScoreAdapters itrs; - itrs.reserve(size); - if (Conjunction || size > 1) { - ctx.wand.root = false; - // TODO(MBkkt) ctx.wand.strict = true; - // We couldn't do this for few reasons: - // 1. It's small chance that we will use just term iterator (or + eof) - // 2. I'm not sure about precision - } - do { - auto docs = (*begin)->execute(ctx); - ++begin; - - // filter out empty iterators - if (irs::doc_limits::eof(docs->value())) { - if constexpr (Conjunction) { - return {}; - } else { - continue; - } - } - - itrs.emplace_back(std::move(docs)); - } while (begin != end); - - return itrs; -} - -// Returns disjunction iterator created from the specified queries -template -irs::doc_iterator::ptr make_disjunction(const irs::ExecutionContext& ctx, - irs::ScoreMergeType merge_type, - QueryIterator begin, QueryIterator end, - Args&&... args) { - IRS_ASSERT(begin <= end); - const size_t size = std::distance(begin, end); - // check the size before the execution - if (0 == size) { - // empty or unreachable search criteria - return irs::doc_iterator::empty(); - } - - auto itrs = MakeScoreAdapters(ctx, begin, end); - if (itrs.empty()) { - return irs::doc_iterator::empty(); - } - - return irs::ResoveMergeType( - merge_type, ctx.scorers.buckets().size(), - [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction_t = - irs::disjunction_iterator; - - return irs::MakeDisjunction(ctx.wand, std::move(itrs), - std::move(aggregator), - std::forward(args)...); - }); -} - -// Returns conjunction iterator created from the specified queries -template -irs::doc_iterator::ptr make_conjunction(const irs::ExecutionContext& ctx, - irs::ScoreMergeType merge_type, - QueryIterator begin, QueryIterator end, - Args&&... args) { - IRS_ASSERT(begin <= end); - const size_t size = std::distance(begin, end); - // check size before the execution - switch (size) { - case 0: - return irs::doc_iterator::empty(); - case 1: - return (*begin)->execute(ctx); - } - - auto itrs = MakeScoreAdapters(ctx, begin, end); - if (itrs.empty()) { - return irs::doc_iterator::empty(); - } - - return irs::ResoveMergeType( - merge_type, ctx.scorers.buckets().size(), - [&](A&& aggregator) -> irs::doc_iterator::ptr { - return irs::MakeConjunction(ctx.wand, std::move(aggregator), - std::move(itrs), std::forward(args)...); - }); -} - } // namespace namespace irs { -// Base class for boolean queries -class BooleanQuery : public filter::prepared { - public: - using queries_t = ManagedVector; - using iterator = queries_t::const_iterator; - - BooleanQuery() noexcept : excl_{0} {} - - doc_iterator::ptr execute(const ExecutionContext& ctx) const final { - if (empty()) { - return doc_iterator::empty(); - } - - IRS_ASSERT(excl_); - const auto excl_begin = this->excl_begin(); - const auto end = this->end(); - - auto incl = execute(ctx, begin(), excl_begin); - - if (excl_begin == end) { - return incl; - } - - // exclusion part does not affect scoring at all - auto excl = ::make_disjunction( - {.segment = ctx.segment, .scorers = Scorers::kUnordered, .ctx = ctx.ctx}, - irs::ScoreMergeType::kSum, excl_begin, end); - - // got empty iterator for excluded - if (doc_limits::eof(excl->value())) { - // pure conjunction/disjunction - return incl; - } - - return memory::make_managed(std::move(incl), std::move(excl)); - } - - void visit(const irs::SubReader& segment, irs::PreparedStateVisitor& visitor, - score_t boost) const final { - boost *= this->boost(); - - if (!visitor.Visit(*this, boost)) { - return; - } - - // FIXME(gnusi): visit exclude group? - for (auto it = begin(), end = excl_begin(); it != end; ++it) { - (*it)->visit(segment, visitor, boost); - } - } - - void prepare(const PrepareContext& ctx, ScoreMergeType merge_type, - std::span incl, - std::span excl) { - BooleanQuery::queries_t queries{{ctx.memory}}; - queries.reserve(incl.size() + excl.size()); - - // apply boost to the current node - this->boost(ctx.boost); - - // prepare included - for (const auto* filter : incl) { - queries.emplace_back(filter->prepare(ctx)); - } - - // prepare excluded - for (const auto* filter : excl) { - // exclusion part does not affect scoring at all - queries.emplace_back(filter->prepare({ - .index = ctx.index, - .memory = ctx.memory, - .ctx = ctx.ctx, - })); - } - - // nothrow block - queries_ = std::move(queries); - excl_ = incl.size(); - merge_type_ = merge_type; - } - - iterator begin() const { return queries_.begin(); } - iterator excl_begin() const { return begin() + excl_; } - iterator end() const { return queries_.end(); } - - bool empty() const { return queries_.empty(); } - size_t size() const { return queries_.size(); } - - protected: - virtual doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, - iterator end) const = 0; - - ScoreMergeType merge_type() const noexcept { return merge_type_; } - - private: - // 0..excl_-1 - included queries - // excl_..queries.end() - excluded queries - queries_t queries_; - // index of the first excluded query - size_t excl_; - ScoreMergeType merge_type_{ScoreMergeType::kSum}; -}; - -// Represent a set of queries joint by "And" -class AndQuery : public BooleanQuery { - public: - doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, - iterator end) const final { - return ::make_conjunction(ctx, merge_type(), begin, end); - } -}; - -// Represent a set of queries joint by "Or" -class OrQuery : public BooleanQuery { - public: - doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, - iterator end) const final { - return ::make_disjunction(ctx, merge_type(), begin, end); - } -}; - -// Represent a set of queries joint by "Or" with the specified -// minimum number of clauses that should satisfy criteria -class MinMatchQuery : public BooleanQuery { - public: - explicit MinMatchQuery(size_t min_match_count) noexcept - : min_match_count_{min_match_count} { - IRS_ASSERT(min_match_count_ > 1); - } - - doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, - iterator end) const final { - IRS_ASSERT(std::distance(begin, end) >= 0); - const size_t size = size_t(std::distance(begin, end)); - - // 1 <= min_match_count - size_t min_match_count = std::max(size_t(1), min_match_count_); - - // check the size before the execution - if (0 == size || min_match_count > size) { - // empty or unreachable search criteria - return doc_iterator::empty(); - } else if (min_match_count == size) { - // pure conjunction - return ::make_conjunction(ctx, merge_type(), begin, end); - } - - // min_match_count <= size - min_match_count = std::min(size, min_match_count); - - auto itrs = MakeScoreAdapters(ctx, begin, end); - if (itrs.empty()) { - return irs::doc_iterator::empty(); - } - - return ResoveMergeType( - merge_type(), ctx.scorers.buckets().size(), - [&](A&& aggregator) -> doc_iterator::ptr { - // FIXME(gnusi): use FAST version - using disjunction_t = min_match_iterator; - - return MakeWeakDisjunction( - ctx.wand, std::move(itrs), min_match_count, std::move(aggregator)); - }); - } - - private: - size_t min_match_count_; -}; - size_t boolean_filter::hash() const noexcept { size_t seed = 0; diff --git a/core/search/boolean_query.cpp b/core/search/boolean_query.cpp new file mode 100644 index 000000000..f585c135a --- /dev/null +++ b/core/search/boolean_query.cpp @@ -0,0 +1,248 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2016 by EMC Corporation, All Rights Reserved +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is EMC Corporation +/// +/// @author Andrey Abramov +//////////////////////////////////////////////////////////////////////////////// + +#include "search/boolean_query.hpp" + +#include "search/conjunction.hpp" +#include "search/disjunction.hpp" +#include "search/prepared_state_visitor.hpp" + +namespace irs { +namespace { + +template +irs::ScoreAdapters MakeScoreAdapters(const irs::ExecutionContext& ctx, It begin, + It end) { + IRS_ASSERT(begin <= end); + const size_t size = std::distance(begin, end); + irs::ScoreAdapters itrs; + itrs.reserve(size); + if (Conjunction || size > 1) { + ctx.wand.root = false; + // TODO(MBkkt) ctx.wand.strict = true; + // We couldn't do this for few reasons: + // 1. It's small chance that we will use just term iterator (or + eof) + // 2. I'm not sure about precision + } + do { + auto docs = (*begin)->execute(ctx); + ++begin; + + // filter out empty iterators + if (irs::doc_limits::eof(docs->value())) { + if constexpr (Conjunction) { + return {}; + } else { + continue; + } + } + + itrs.emplace_back(std::move(docs)); + } while (begin != end); + + return itrs; +} + +// Returns disjunction iterator created from the specified queries +template +irs::doc_iterator::ptr make_disjunction(const irs::ExecutionContext& ctx, + irs::ScoreMergeType merge_type, + QueryIterator begin, QueryIterator end, + Args&&... args) { + IRS_ASSERT(begin <= end); + const size_t size = std::distance(begin, end); + // check the size before the execution + if (0 == size) { + // empty or unreachable search criteria + return irs::doc_iterator::empty(); + } + + auto itrs = MakeScoreAdapters(ctx, begin, end); + if (itrs.empty()) { + return irs::doc_iterator::empty(); + } + + return irs::ResoveMergeType( + merge_type, ctx.scorers.buckets().size(), + [&](A&& aggregator) -> irs::doc_iterator::ptr { + using disjunction_t = + irs::disjunction_iterator; + + return irs::MakeDisjunction(ctx.wand, std::move(itrs), + std::forward(aggregator), + std::forward(args)...); + }); +} + +// Returns conjunction iterator created from the specified queries +template +irs::doc_iterator::ptr make_conjunction(const irs::ExecutionContext& ctx, + irs::ScoreMergeType merge_type, + QueryIterator begin, QueryIterator end, + Args&&... args) { + IRS_ASSERT(begin <= end); + const size_t size = std::distance(begin, end); + // check size before the execution + switch (size) { + case 0: + return irs::doc_iterator::empty(); + case 1: + return (*begin)->execute(ctx); + } + + auto itrs = MakeScoreAdapters(ctx, begin, end); + if (itrs.empty()) { + return irs::doc_iterator::empty(); + } + + return irs::ResoveMergeType( + merge_type, ctx.scorers.buckets().size(), + [&](A&& aggregator) -> irs::doc_iterator::ptr { + return irs::MakeConjunction(ctx.wand, std::forward(aggregator), + std::move(itrs), std::forward(args)...); + }); +} + +} // namespace + +doc_iterator::ptr BooleanQuery::execute(const ExecutionContext& ctx) const { + if (empty()) { + return doc_iterator::empty(); + } + + IRS_ASSERT(excl_); + const auto excl_begin = this->excl_begin(); + const auto end = this->end(); + + auto incl = execute(ctx, begin(), excl_begin); + + if (excl_begin == end) { + return incl; + } + + // exclusion part does not affect scoring at all + auto excl = make_disjunction( + {.segment = ctx.segment, .scorers = Scorers::kUnordered, .ctx = ctx.ctx}, + irs::ScoreMergeType::kSum, excl_begin, end); + + // got empty iterator for excluded + if (doc_limits::eof(excl->value())) { + // pure conjunction/disjunction + return incl; + } + + return memory::make_managed(std::move(incl), std::move(excl)); +} + +void BooleanQuery::visit(const irs::SubReader& segment, + irs::PreparedStateVisitor& visitor, + score_t boost) const { + boost *= this->boost(); + + if (!visitor.Visit(*this, boost)) { + return; + } + + // FIXME(gnusi): visit exclude group? + for (auto it = begin(), end = excl_begin(); it != end; ++it) { + (*it)->visit(segment, visitor, boost); + } +} +void BooleanQuery::prepare(const PrepareContext& ctx, ScoreMergeType merge_type, + queries_t queries, size_t exclude_start) { + // apply boost to the current node + this->boost(ctx.boost); + // nothrow block + queries_ = std::move(queries); + excl_ = exclude_start; + merge_type_ = merge_type; +} + +void BooleanQuery::prepare(const PrepareContext& ctx, ScoreMergeType merge_type, + std::span incl, + std::span excl) { + queries_t queries{{ctx.memory}}; + queries.reserve(incl.size() + excl.size()); + // prepare included + for (const auto* filter : incl) { + queries.emplace_back(filter->prepare(ctx)); + } + // prepare excluded + for (const auto* filter : excl) { + // exclusion part does not affect scoring at all + queries.emplace_back(filter->prepare({ + .index = ctx.index, + .memory = ctx.memory, + .ctx = ctx.ctx, + })); + } + prepare(ctx, merge_type, std::move(queries), incl.size()); +} + +doc_iterator::ptr AndQuery::execute(const ExecutionContext& ctx, iterator begin, + iterator end) const { + return make_conjunction(ctx, merge_type(), begin, end); +} + +doc_iterator::ptr OrQuery::execute(const ExecutionContext& ctx, iterator begin, + iterator end) const { + return make_disjunction(ctx, merge_type(), begin, end); +} + +doc_iterator::ptr MinMatchQuery::execute(const ExecutionContext& ctx, + iterator begin, iterator end) const { + IRS_ASSERT(std::distance(begin, end) >= 0); + const auto size = size_t(std::distance(begin, end)); + + // 1 <= min_match_count + size_t min_match_count = std::max(size_t(1), min_match_count_); + + // check the size before the execution + if (0 == size || min_match_count > size) { + // empty or unreachable search criteria + return doc_iterator::empty(); + } else if (min_match_count == size) { + // pure conjunction + return make_conjunction(ctx, merge_type(), begin, end); + } + + // min_match_count <= size + min_match_count = std::min(size, min_match_count); + + auto itrs = MakeScoreAdapters(ctx, begin, end); + if (itrs.empty()) { + return irs::doc_iterator::empty(); + } + + return ResoveMergeType(merge_type(), ctx.scorers.buckets().size(), + [&](A&& aggregator) -> doc_iterator::ptr { + // FIXME(gnusi): use FAST version + using disjunction_t = + min_match_iterator; + + return MakeWeakDisjunction( + ctx.wand, std::move(itrs), min_match_count, + std::forward(aggregator)); + }); +} + +} // namespace irs diff --git a/core/search/boolean_query.hpp b/core/search/boolean_query.hpp new file mode 100644 index 000000000..ac7740f48 --- /dev/null +++ b/core/search/boolean_query.hpp @@ -0,0 +1,102 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2016 by EMC Corporation, All Rights Reserved +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is EMC Corporation +/// +/// @author Andrey Abramov +//////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include + +#include "search/exclusion.hpp" +#include "search/filter.hpp" + +namespace irs { + +// Base class for boolean queries +class BooleanQuery : public filter::prepared { + public: + using queries_t = ManagedVector; + using iterator = queries_t::const_iterator; + + doc_iterator::ptr execute(const ExecutionContext& ctx) const final; + + void visit(const irs::SubReader& segment, irs::PreparedStateVisitor& visitor, + score_t boost) const final; + + void prepare(const PrepareContext& ctx, ScoreMergeType merge_type, + queries_t queries, size_t exclude_start); + + void prepare(const PrepareContext& ctx, ScoreMergeType merge_type, + std::span incl, + std::span excl); + + iterator begin() const { return queries_.begin(); } + iterator excl_begin() const { return begin() + excl_; } + iterator end() const { return queries_.end(); } + + bool empty() const { return queries_.empty(); } + size_t size() const { return queries_.size(); } + + protected: + virtual doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, + iterator end) const = 0; + + ScoreMergeType merge_type() const noexcept { return merge_type_; } + + private: + // 0..excl_-1 - included queries + // excl_..queries.end() - excluded queries + queries_t queries_; + // index of the first excluded query + size_t excl_{0}; + ScoreMergeType merge_type_{ScoreMergeType::kSum}; +}; + +// Represent a set of queries joint by "And" +class AndQuery : public BooleanQuery { + public: + doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, + iterator end) const final; +}; + +// Represent a set of queries joint by "Or" +class OrQuery : public BooleanQuery { + public: + doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, + iterator end) const final; +}; + +// Represent a set of queries joint by "Or" with the specified +// minimum number of clauses that should satisfy criteria +class MinMatchQuery : public BooleanQuery { + public: + explicit MinMatchQuery(size_t min_match_count) noexcept + : min_match_count_{min_match_count} { + IRS_ASSERT(min_match_count_ > 1); + } + + doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin, + iterator end) const final; + + private: + size_t min_match_count_; +}; + +} // namespace irs diff --git a/core/search/column_existence_filter.cpp b/core/search/column_existence_filter.cpp index 33b763f80..1a154ae0b 100644 --- a/core/search/column_existence_filter.cpp +++ b/core/search/column_existence_filter.cpp @@ -83,7 +83,7 @@ class column_prefix_existence_query : public column_existence_query { } irs::doc_iterator::ptr execute(const ExecutionContext& ctx) const final { - using adapter_t = irs::ScoreAdapter; + using adapter_t = irs::ScoreAdapter<>; IRS_ASSERT(acceptor_); diff --git a/core/search/conjunction.hpp b/core/search/conjunction.hpp index 73c7f1563..4b4f425e2 100644 --- a/core/search/conjunction.hpp +++ b/core/search/conjunction.hpp @@ -32,7 +32,7 @@ namespace irs { // Adapter to use DocIterator with conjunction and disjunction. -template +template struct ScoreAdapter { ScoreAdapter() noexcept = default; ScoreAdapter(DocIterator&& it) noexcept @@ -67,8 +67,7 @@ struct ScoreAdapter { const irs::score* score{}; }; -template -using ScoreAdapters = std::vector>; +using ScoreAdapters = std::vector>; // Helpers template @@ -92,10 +91,8 @@ template struct ConjunctionBase : public doc_iterator, protected Merger, protected score_ctx { - using merger_type = Merger; - protected: - explicit ConjunctionBase(Merger&& merger, ScoreAdapters&& itrs, + explicit ConjunctionBase(Merger&& merger, std::vector&& itrs, std::vector&& scorers) : Merger{std::move(merger)}, itrs_{std::move(itrs)}, @@ -150,7 +147,7 @@ struct ConjunctionBase : public doc_iterator, auto end() const noexcept { return itrs_.end(); } size_t size() const noexcept { return itrs_.size(); } - ScoreAdapters itrs_; + std::vector itrs_; std::vector scores_; }; @@ -161,7 +158,7 @@ class Conjunction : public ConjunctionBase { std::tuple, attribute_ptr, score>; public: - explicit Conjunction(Merger&& merger, ScoreAdapters&& itrs, + explicit Conjunction(Merger&& merger, std::vector&& itrs, std::vector&& scores = {}) : Base{std::move(merger), std::move(itrs), std::move(scores)}, front_{this->itrs_.front().it.get()} { @@ -236,7 +233,7 @@ class BlockConjunction : public ConjunctionBase { using Attributes = std::tuple, irs::score>; public: - explicit BlockConjunction(Merger&& merger, ScoreAdapters&& itrs, + explicit BlockConjunction(Merger&& merger, std::vector&& itrs, SubScores&& scores, bool strict) : Base{std::move(merger), std::move(itrs), std::move(scores.scores)}, sum_scores_{scores.sum_score}, @@ -439,7 +436,7 @@ class BlockConjunction : public ConjunctionBase { template typename Wrapper = EmptyWrapper, typename Merger, typename DocIterator, typename... Args> doc_iterator::ptr MakeConjunction(WandContext ctx, Merger&& merger, - ScoreAdapters&& itrs, + std::vector&& itrs, Args&&... args) { if (const auto size = itrs.size(); 0 == size) { // empty or unreachable search criteria diff --git a/core/search/disjunction.hpp b/core/search/disjunction.hpp index ab0f52f82..bfb9e7025 100644 --- a/core/search/disjunction.hpp +++ b/core/search/disjunction.hpp @@ -208,7 +208,6 @@ class basic_disjunction : public compound_doc_iterator, private score_ctx { public: using adapter = Adapter; - using merger_type = Merger; basic_disjunction(adapter&& lhs, adapter&& rhs, Merger&& merger = Merger{}) : basic_disjunction{std::move(lhs), std::move(rhs), std::move(merger), @@ -613,7 +612,6 @@ class disjunction : public compound_doc_iterator, private score_ctx { public: using adapter = Adapter; - using merger_type = Merger; using doc_iterators_t = std::vector; using heap_container = std::vector; using heap_iterator = heap_container::iterator; @@ -896,7 +894,6 @@ class block_disjunction : public doc_iterator, public: using traits_type = Traits; using adapter = Adapter; - using merger_type = Merger; using doc_iterators_t = std::vector; block_disjunction(doc_iterators_t&& itrs, Merger&& merger, cost::cost_t est) diff --git a/core/search/min_match_disjunction.hpp b/core/search/min_match_disjunction.hpp index 3f0e613fe..3edcc2241 100644 --- a/core/search/min_match_disjunction.hpp +++ b/core/search/min_match_disjunction.hpp @@ -26,6 +26,22 @@ namespace irs { +template +struct CostAdapter : ScoreAdapter { + explicit CostAdapter(DocIterator&& it) noexcept + : ScoreAdapter{std::move(it)} { + // TODO(MBkkt) 0 instead of kMax? + est = cost::extract(*this->it, cost::kMax); + } + + CostAdapter(CostAdapter&&) noexcept = default; + CostAdapter& operator=(CostAdapter&&) noexcept = default; + + cost::cost_t est{}; +}; + +using CostAdapters = std::vector>; + // Heapsort-based "weak and" iterator // ----------------------------------------------------------------------------- // [0] <-- begin @@ -38,59 +54,39 @@ namespace irs { // s | ... | // t | [n] <-- end // ----------------------------------------------------------------------------- -template -class min_match_disjunction : public doc_iterator, - private Merger, - private score_ctx { +template +class MinMatchDisjunction : public doc_iterator, + protected Merger, + protected score_ctx { public: - struct cost_iterator_adapter : ScoreAdapter { - cost_iterator_adapter(irs::doc_iterator::ptr&& it) noexcept - : ScoreAdapter(std::move(it)) { - est = cost::extract(*this->it, cost::kMax); - } - - cost_iterator_adapter(cost_iterator_adapter&&) = default; - cost_iterator_adapter& operator=(cost_iterator_adapter&&) = default; - - cost::cost_t est; - }; - - static_assert(std::is_nothrow_move_constructible_v); - - typedef cost_iterator_adapter doc_iterator_t; - typedef std::vector doc_iterators_t; - - min_match_disjunction(doc_iterators_t&& itrs, size_t min_match_count, - Merger&& merger = Merger{}) + MinMatchDisjunction(CostAdapters&& itrs, size_t min_match_count, + Merger&& merger = {}) : Merger{std::move(merger)}, - itrs_(std::move(itrs)), - min_match_count_( - std::min(itrs_.size(), std::max(size_t(1), min_match_count))), - lead_(itrs_.size()) { + itrs_{std::move(itrs)}, + min_match_count_{std::clamp(min_match_count, size_t{1}, itrs_.size())}, + lead_{itrs_.size()} { IRS_ASSERT(!itrs_.empty()); IRS_ASSERT(min_match_count_ >= 1 && min_match_count_ <= itrs_.size()); // sort subnodes in ascending order by their cost - std::sort( - itrs_.begin(), itrs_.end(), - [](const doc_iterator_t& lhs, const doc_iterator_t& rhs) noexcept { - return cost::extract(lhs, 0) < cost::extract(rhs, 0); - }); + std::sort(itrs_.begin(), itrs_.end(), + [](const auto& lhs, const auto& rhs) noexcept { + return lhs.est < rhs.est; + }); std::get(attrs_).reset([this]() noexcept { - return std::accumulate( - itrs_.begin(), itrs_.end(), cost::cost_t(0), - [](cost::cost_t lhs, const doc_iterator_t& rhs) noexcept { - return lhs + cost::extract(rhs, 0); - }); + return std::accumulate(itrs_.begin(), itrs_.end(), cost::cost_t{0}, + [](cost::cost_t lhs, const auto& rhs) noexcept { + return lhs + rhs.est; + }); }); // prepare external heap heap_.resize(itrs_.size()); - std::iota(heap_.begin(), heap_.end(), size_t(0)); + std::iota(heap_.begin(), heap_.end(), size_t{0}); if constexpr (HasScore_v) { - prepare_score(); + PrepareScore(); } } @@ -101,63 +97,58 @@ class min_match_disjunction : public doc_iterator, doc_id_t value() const final { return std::get(attrs_).value; } bool next() final { - auto& doc_ = std::get(attrs_); + auto& doc_value = std::get(attrs_).value; - if (doc_limits::eof(doc_.value)) { + if (doc_limits::eof(doc_value)) { return false; } - while (check_size()) { + while (CheckSize()) { // start next iteration. execute next for all lead iterators // and move them to head - if (!pop_lead()) { - doc_.value = doc_limits::eof(); + if (!PopLead()) { + doc_value = doc_limits::eof(); return false; } // make step for all head iterators less or equal current doc (doc_) - while (top().value() <= doc_.value) { - const bool exhausted = top().value() == doc_.value - ? !top()->next() - : doc_limits::eof(top()->seek(doc_.value + 1)); + while (Top().value() <= doc_value) { + const bool exhausted = Top().value() == doc_value + ? !Top()->next() + : doc_limits::eof(Top()->seek(doc_value + 1)); - if (exhausted && !remove_top()) { - doc_.value = doc_limits::eof(); + if (exhausted && !RemoveTop()) { + doc_value = doc_limits::eof(); return false; - } else { - refresh_top(); } + RefreshTop(); } // count equal iterators - const auto top = this->top().value(); + const auto top = Top().value(); do { - add_lead(); + AddLead(); if (lead_ >= min_match_count_) { - return !doc_limits::eof(doc_.value = top); + return !doc_limits::eof(doc_value = top); } - } while (top == this->top().value()); + } while (top == Top().value()); } - doc_.value = doc_limits::eof(); + doc_value = doc_limits::eof(); return false; } doc_id_t seek(doc_id_t target) final { - auto& doc_ = std::get(attrs_); - - if (target <= doc_.value) { - return doc_.value; - } + auto& doc_value = std::get(attrs_).value; - if (doc_limits::eof(doc_.value)) { - return doc_.value; + if (target <= doc_value) { + return doc_value; } // execute seek for all lead iterators and // move one to head if it doesn't hit the target - for (auto it = lead(), end = heap_.end(); it != end;) { + for (auto it = Lead(), end = heap_.end(); it != end;) { IRS_ASSERT(*it < itrs_.size()); const auto doc = itrs_[*it]->seek(target); @@ -165,16 +156,16 @@ class min_match_disjunction : public doc_iterator, --lead_; // iterator exhausted - if (!remove_lead(it)) { - return (doc_.value = doc_limits::eof()); + if (!RemoveLead(it)) { + return doc_value = doc_limits::eof(); } - it = lead(); + it = Lead(); end = heap_.end(); } else { if (doc != target) { // move back to head - push_head(it); + PushHead(it); --lead_; } ++it; @@ -183,36 +174,36 @@ class min_match_disjunction : public doc_iterator, // check if we still satisfy search criteria if (lead_ >= min_match_count_) { - return doc_.value = target; + return doc_value = target; } // main search loop - for (;; target = top().value()) { - while (top().value() <= target) { - const auto doc = top()->seek(target); + for (;; target = Top().value()) { + while (Top().value() <= target) { + const auto doc = Top()->seek(target); if (doc_limits::eof(doc)) { // iterator exhausted - if (!remove_top()) { - return (doc_.value = doc_limits::eof()); + if (!RemoveTop()) { + return doc_value = doc_limits::eof(); } } else if (doc == target) { // valid iterator, doc == target - add_lead(); + AddLead(); if (lead_ >= min_match_count_) { - return (doc_.value = target); + return doc_value = target; } } else { // invalid iterator, doc != target - refresh_top(); + RefreshTop(); } } // can't find enough iterators equal to target here. // start next iteration. execute next for all lead iterators // and move them to head - if (!pop_lead()) { - return doc_.value = doc_limits::eof(); + if (!PopLead()) { + return doc_value = doc_limits::eof(); } } } @@ -222,27 +213,27 @@ class min_match_disjunction : public doc_iterator, // to current matched document after this call. // Returns total matched iterators count. size_t match_count() { - push_valid_to_lead(); + PushValidToLead(); return lead_; } private: - using attributes = std::tuple; + using Attributes = std::tuple; - void prepare_score() { + void PrepareScore() { IRS_ASSERT(Merger::size()); auto& score = std::get(attrs_); score.Reset(*this, [](score_ctx* ctx, score_t* res) noexcept { - auto& self = *static_cast(ctx); + auto& self = static_cast(*ctx); IRS_ASSERT(!self.heap_.empty()); - self.push_valid_to_lead(); + self.PushValidToLead(); // score lead iterators std::memset(res, 0, static_cast(self).byte_size()); - std::for_each(self.lead(), self.heap_.end(), [&self, res](size_t it) { + std::for_each(self.Lead(), self.heap_.end(), [&self, res](size_t it) { IRS_ASSERT(it < self.itrs_.size()); if (auto& score = *self.itrs_[it].score; !score.IsDefault()) { auto& merger = static_cast(self); @@ -254,30 +245,30 @@ class min_match_disjunction : public doc_iterator, } // Push all valid iterators to lead. - void push_valid_to_lead() { - auto& doc_ = std::get(attrs_); + void PushValidToLead() { + auto& doc_value = std::get(attrs_).value; - for (auto lead = this->lead(), begin = heap_.begin(); - lead != begin && top().value() <= doc_.value;) { + for (auto lead = Lead(), begin = heap_.begin(); + lead != begin && Top().value() <= doc_value;) { // hitch head - if (top().value() == doc_.value) { + if (Top().value() == doc_value) { // got hit here - add_lead(); + AddLead(); --lead; } else { - if (doc_limits::eof(top()->seek(doc_.value))) { + if (doc_limits::eof(Top()->seek(doc_value))) { // iterator exhausted - remove_top(); - lead = this->lead(); + RemoveTop(); + lead = Lead(); } else { - refresh_top(); + RefreshTop(); } } } } template - void push(Iterator begin, Iterator end) noexcept { + void Push(Iterator begin, Iterator end) noexcept { // lambda here gives ~20% speedup on GCC std::push_heap(begin, end, [this](const size_t lhs, const size_t rhs) noexcept { @@ -293,7 +284,7 @@ class min_match_disjunction : public doc_iterator, } template - void pop(Iterator begin, Iterator end) noexcept { + void Pop(Iterator begin, Iterator end) noexcept { // lambda here gives ~20% speedup on GCC detail::pop_heap(begin, end, [this](const size_t lhs, const size_t rhs) noexcept { @@ -311,22 +302,22 @@ class min_match_disjunction : public doc_iterator, // Performs a step for each iterator in lead group and pushes it to the head. // Returns true - if the min_match_count_ condition still can be satisfied, // false - otherwise - bool pop_lead() { - for (auto it = lead(), end = heap_.end(); it != end;) { + bool PopLead() { + for (auto it = Lead(), end = heap_.end(); it != end;) { IRS_ASSERT(*it < itrs_.size()); if (!itrs_[*it]->next()) { --lead_; // remove iterator - if (!remove_lead(it)) { + if (!RemoveLead(it)) { return false; } - it = lead(); + it = Lead(); end = heap_.end(); } else { // push back to head - push(heap_.begin(), ++it); + Push(heap_.begin(), ++it); --lead_; } } @@ -339,71 +330,71 @@ class min_match_disjunction : public doc_iterator, // Returns true - if the min_match_count_ condition still can be satisfied, // false - otherwise. template - bool remove_lead(Iterator it) noexcept { + bool RemoveLead(Iterator it) noexcept { if (&*it != &heap_.back()) { std::swap(*it, heap_.back()); } heap_.pop_back(); - return check_size(); + return CheckSize(); } // Removes iterator from the top of the head without moving // iterators after the specified iterator. // Returns true - if the min_match_count_ condition still can be satisfied, // false - otherwise. - bool remove_top() noexcept { - auto lead = this->lead(); - pop(heap_.begin(), lead); - return remove_lead(--lead); + bool RemoveTop() noexcept { + auto lead = Lead(); + Pop(heap_.begin(), lead); + return RemoveLead(--lead); } // Refresh the value of the top of the head. - void refresh_top() noexcept { - auto lead = this->lead(); - pop(heap_.begin(), lead); - push(heap_.begin(), lead); + void RefreshTop() noexcept { + auto lead = Lead(); + Pop(heap_.begin(), lead); + Push(heap_.begin(), lead); } // Push the specified iterator from lead group to the head // without movinh iterators after the specified iterator. template - void push_head(Iterator it) noexcept { - Iterator lead = this->lead(); + void PushHead(Iterator it) noexcept { + Iterator lead = Lead(); if (it != lead) { std::swap(*it, *lead); } ++lead; - push(heap_.begin(), lead); + Push(heap_.begin(), lead); } // Returns true - if the min_match_count_ condition still can be satisfied, // false - otherwise. - bool check_size() const noexcept { return heap_.size() >= min_match_count_; } + bool CheckSize() const noexcept { return heap_.size() >= min_match_count_; } // Returns reference to the top of the head - doc_iterator_t& top() noexcept { + auto& Top() noexcept { IRS_ASSERT(!heap_.empty()); IRS_ASSERT(heap_.front() < itrs_.size()); return itrs_[heap_.front()]; } // Returns the first iterator in the lead group - auto lead() noexcept { + auto Lead() noexcept { IRS_ASSERT(lead_ <= heap_.size()); return heap_.end() - lead_; } // Adds iterator to the lead group - void add_lead() { - pop(heap_.begin(), lead()); + void AddLead() { + Pop(heap_.begin(), Lead()); ++lead_; } - doc_iterators_t itrs_; // sub iterators + CostAdapters itrs_; // sub iterators std::vector heap_; size_t min_match_count_; // minimum number of hits size_t lead_; // number of iterators in lead group - attributes attrs_; + Attributes attrs_; }; } // namespace irs diff --git a/core/search/multiterm_query.cpp b/core/search/multiterm_query.cpp index f64f53d0d..7afe4787b 100644 --- a/core/search/multiterm_query.cpp +++ b/core/search/multiterm_query.cpp @@ -132,8 +132,7 @@ doc_iterator::ptr MultiTermQuery::execute(const ExecutionContext& ctx) const { const bool has_unscored_terms = !state->unscored_terms.empty(); - ScoreAdapters itrs(state->scored_states.size() + - size_t(has_unscored_terms)); + ScoreAdapters itrs(state->scored_states.size() + size_t(has_unscored_terms)); auto it = std::begin(itrs); // add an iterator for each of the scored states diff --git a/core/search/ngram_similarity_filter.cpp b/core/search/ngram_similarity_filter.cpp index be54d5ee1..e2d370ed4 100644 --- a/core/search/ngram_similarity_filter.cpp +++ b/core/search/ngram_similarity_filter.cpp @@ -33,15 +33,13 @@ namespace irs { filter::prepared::ptr by_ngram_similarity::Prepare( const PrepareContext& ctx, std::string_view field_name, - const options_type& options) { - const auto& ngrams = options.ngrams; - + const std::vector& ngrams, float_t threshold) { if (ngrams.empty() || field_name.empty()) { // empty field or terms or invalid threshold return filter::prepared::empty(); } - const auto threshold = std::clamp(options.threshold, 0.f, 1.f); + threshold = std::clamp(threshold, 0.f, 1.f); const auto min_match_count = std::clamp(static_cast(std::ceil(ngrams.size() * threshold)), size_t{1}, ngrams.size()); diff --git a/core/search/ngram_similarity_filter.hpp b/core/search/ngram_similarity_filter.hpp index 6980bd860..310309979 100644 --- a/core/search/ngram_similarity_filter.hpp +++ b/core/search/ngram_similarity_filter.hpp @@ -53,10 +53,12 @@ class by_ngram_similarity : public filter_base { public: static prepared::ptr Prepare(const PrepareContext& ctx, std::string_view field_name, - const options_type& options); + const std::vector& ngrams, + float_t threshold); prepared::ptr prepare(const PrepareContext& ctx) const final { - return Prepare(ctx.Boost(boost()), field(), options()); + return Prepare(ctx.Boost(boost()), field(), options().ngrams, + options().threshold); } }; diff --git a/core/search/ngram_similarity_query.cpp b/core/search/ngram_similarity_query.cpp index 6981bdc06..85d5249b3 100644 --- a/core/search/ngram_similarity_query.cpp +++ b/core/search/ngram_similarity_query.cpp @@ -91,9 +91,37 @@ struct SearchState { }; using SearchStates = - std::map, std::greater>; + std::map, std::greater<>>; -using NGramApprox = min_match_disjunction; +template +class NGramApprox : public MinMatchDisjunction { + using Base = MinMatchDisjunction; + + public: + using Base::Base; +}; + +template<> +class NGramApprox : public Conjunction, NoopAggregator> { + using Base = Conjunction, NoopAggregator>; + + public: + NGramApprox(CostAdapters&& itrs, size_t min_match_count) + : Base{NoopAggregator{}, + [](auto&& itrs) { + std::sort(itrs.begin(), itrs.end(), + [](const auto& lhs, const auto& rhs) noexcept { + return lhs.est < rhs.est; + }); + return std::move(itrs); + }(std::move(itrs))}, + match_count_{min_match_count} {} + + size_t match_count() const noexcept { return match_count_; } + + private: + size_t match_count_; +}; struct Dummy {}; @@ -427,16 +455,15 @@ bool SerialPositionsChecker::Check(size_t potential, doc_id_t doc) { return longest_sequence_len >= min_match_count_; } -template +template class NGramSimilarityDocIterator : public doc_iterator, private score_ctx { public: - NGramSimilarityDocIterator(NGramApprox::doc_iterators_t&& itrs, - size_t total_terms_count, size_t min_match_count, - bool collect_all_states) + NGramSimilarityDocIterator(CostAdapters&& itrs, size_t total_terms_count, + size_t min_match_count, bool collect_all_states) : checker_{std::begin(itrs), std::end(itrs), total_terms_count, min_match_count, collect_all_states}, // we are not interested in disjunction`s // scoring - approx_(std::move(itrs), min_match_count, NoopAggregator{}) { + approx_{std::move(itrs), min_match_count} { // avoid runtime conversion std::get>(attrs_) = irs::get_mutable(&approx_); @@ -445,10 +472,9 @@ class NGramSimilarityDocIterator : public doc_iterator, private score_ctx { std::get>(attrs_) = irs::get_mutable(&approx_); } - NGramSimilarityDocIterator(NGramApprox::doc_iterators_t&& itrs, - const SubReader& segment, const term_reader& field, - score_t boost, const byte_type* stats, - size_t total_terms_count, + NGramSimilarityDocIterator(CostAdapters&& itrs, const SubReader& segment, + const term_reader& field, score_t boost, + const byte_type* stats, size_t total_terms_count, size_t min_match_count = 1, const Scorers& ord = Scorers::kUnordered) : NGramSimilarityDocIterator{std::move(itrs), total_terms_count, @@ -462,7 +488,7 @@ class NGramSimilarityDocIterator : public doc_iterator, private score_ctx { attribute* get_mutable(type_info::type_id type) noexcept final { auto* attr = irs::get_mutable(attrs_, type); - return attr ? attr : checker_.GetMutable(type); + return attr != nullptr ? attr : checker_.GetMutable(type); } bool next() final { @@ -479,47 +505,47 @@ class NGramSimilarityDocIterator : public doc_iterator, private score_ctx { } doc_id_t seek(doc_id_t target) final { - auto* doc_ = std::get>(attrs_).ptr; + auto* doc = std::get>(attrs_).ptr; - if (doc_->value >= target) { - return doc_->value; + if (doc->value >= target) { + return doc->value; } - const auto doc = approx_.seek(target); + const auto doc_id = approx_.seek(target); - if (doc_limits::eof(doc) || - checker_.Check(approx_.match_count(), doc_->value)) { - return doc; + if (doc_limits::eof(doc_id) || + checker_.Check(approx_.match_count(), doc->value)) { + return doc_id; } next(); - return doc_->value; + return doc->value; } private: - using attributes = + using Attributes = std::tuple, attribute_ptr, score>; Checker checker_; - NGramApprox approx_; - attributes attrs_; + Approx approx_; + Attributes attrs_; }; -NGramApprox::doc_iterators_t Execute(const NGramState& query_state, - IndexFeatures required_features, - IndexFeatures extra_features) { - auto* field = query_state.field; +CostAdapters Execute(const NGramState& query_state, + IndexFeatures required_features, + IndexFeatures extra_features) { + const auto* field = query_state.field; - if (!field || + if (field == nullptr || required_features != (field->meta().index_features & required_features)) { return {}; } required_features |= extra_features; - NGramApprox::doc_iterators_t itrs; + CostAdapters itrs; itrs.reserve(query_state.terms.size()); - for (auto& term_state : query_state.terms) { + for (const auto& term_state : query_state.terms) { if (IRS_UNLIKELY(term_state == nullptr)) { continue; } @@ -540,13 +566,13 @@ NGramApprox::doc_iterators_t Execute(const NGramState& query_state, doc_iterator::ptr NGramSimilarityQuery::execute( const ExecutionContext& ctx) const { - auto& ord = ctx.scorers; + const auto& ord = ctx.scorers; IRS_ASSERT(1 != min_match_count_ || !ord.empty()); - auto& segment = ctx.segment; - auto query_state = states_.find(segment); + const auto& segment = ctx.segment; + const auto* query_state = states_.find(segment); - if (!query_state) { + if (query_state == nullptr) { return doc_iterator::empty(); } @@ -555,18 +581,23 @@ doc_iterator::ptr NGramSimilarityQuery::execute( if (itrs.size() < min_match_count_) { return doc_iterator::empty(); } - - return memory::make_managed< - NGramSimilarityDocIterator>>( + if (itrs.size() == min_match_count_) { + return memory::make_managed, SerialPositionsChecker>>( + std::move(itrs), segment, *query_state->field, boost(), stats_.c_str(), + query_state->terms.size(), min_match_count_, ord); + } + return memory::make_managed, SerialPositionsChecker>>( std::move(itrs), segment, *query_state->field, boost(), stats_.c_str(), query_state->terms.size(), min_match_count_, ord); } doc_iterator::ptr NGramSimilarityQuery::ExecuteWithOffsets( const SubReader& rdr) const { - auto query_state = states_.find(rdr); + const auto* query_state = states_.find(rdr); - if (!query_state) { + if (query_state == nullptr) { return doc_iterator::empty(); } @@ -576,9 +607,13 @@ doc_iterator::ptr NGramSimilarityQuery::ExecuteWithOffsets( if (itrs.size() < min_match_count_) { return doc_iterator::empty(); } - - return memory::make_managed< - NGramSimilarityDocIterator>>( + if (itrs.size() == min_match_count_) { + return memory::make_managed, SerialPositionsChecker>>( + std::move(itrs), query_state->terms.size(), min_match_count_, true); + } + return memory::make_managed, SerialPositionsChecker>>( std::move(itrs), query_state->terms.size(), min_match_count_, true); } diff --git a/core/search/ngram_similarity_query.hpp b/core/search/ngram_similarity_query.hpp index c4bdf8747..69767a406 100644 --- a/core/search/ngram_similarity_query.hpp +++ b/core/search/ngram_similarity_query.hpp @@ -50,7 +50,7 @@ class NGramSimilarityQuery : public filter::prepared { void visit(const SubReader& segment, PreparedStateVisitor& visitor, score_t boost) const final { - if (auto* state = states_.find(segment); state) { + if (const auto* state = states_.find(segment); state) { visitor.Visit(*this, *state, boost * this->boost()); } } diff --git a/core/search/phrase_iterator.hpp b/core/search/phrase_iterator.hpp index c9c764e9a..bd6f519d2 100644 --- a/core/search/phrase_iterator.hpp +++ b/core/search/phrase_iterator.hpp @@ -165,10 +165,10 @@ class FixedPhraseFrequency { }; // Adapter to use doc_iterator with positions for disjunction -struct VariadicPhraseAdapter : ScoreAdapter { +struct VariadicPhraseAdapter : ScoreAdapter<> { VariadicPhraseAdapter() = default; VariadicPhraseAdapter(doc_iterator::ptr&& it, score_t boost) noexcept - : ScoreAdapter(std::move(it)), + : ScoreAdapter<>(std::move(it)), position(irs::get_mutable(this->it.get())), boost(boost) {} @@ -549,8 +549,7 @@ class PhraseIterator : public doc_iterator { public: using TermPosition = typename Frequency::TermPosition; - PhraseIterator(ScoreAdapters&& itrs, - std::vector&& pos) + PhraseIterator(ScoreAdapters&& itrs, std::vector&& pos) : approx_{NoopAggregator{}, [](auto&& itrs) { std::sort(itrs.begin(), itrs.end(), @@ -569,7 +568,7 @@ class PhraseIterator : public doc_iterator { irs::get_mutable(&approx_); } - PhraseIterator(ScoreAdapters&& itrs, + PhraseIterator(ScoreAdapters&& itrs, std::vector&& pos, const SubReader& segment, const term_reader& field, const byte_type* stats, const Scorers& ord, score_t boost) diff --git a/core/search/phrase_query.cpp b/core/search/phrase_query.cpp index c44ae8ebb..661f09451 100644 --- a/core/search/phrase_query.cpp +++ b/core/search/phrase_query.cpp @@ -35,13 +35,13 @@ constexpr IndexFeatures kRequireOffs = template using FixedPhraseIterator = - PhraseIterator, + PhraseIterator, NoopAggregator>, FixedPhraseFrequency>; // FIXME add proper handling of overlapped case template using VariadicPhraseIterator = PhraseIterator< - Conjunction, + Conjunction, NoopAggregator>, VariadicPhraseFrequency>; } // namespace @@ -69,7 +69,7 @@ doc_iterator::ptr FixedPhraseQuery::execute(const ExecutionContext& ctx) const { // get index features required for query & order const IndexFeatures features = ord.features() | kRequiredFeatures; - ScoreAdapters itrs; + ScoreAdapters itrs; itrs.reserve(phrase_state->terms.size()); std::vector positions; @@ -113,7 +113,7 @@ doc_iterator::ptr FixedPhraseQuery::execute(const ExecutionContext& ctx) const { doc_iterator::ptr FixedPhraseQuery::ExecuteWithOffsets( const SubReader& rdr) const { using FixedPhraseIterator = - PhraseIterator, + PhraseIterator, NoopAggregator>, PhrasePosition>>; // get phrase state for the specified reader @@ -124,7 +124,7 @@ doc_iterator::ptr FixedPhraseQuery::ExecuteWithOffsets( return doc_iterator::empty(); } - ScoreAdapters itrs; + ScoreAdapters itrs; itrs.reserve(phrase_state->terms.size()); std::vector positions; @@ -223,7 +223,7 @@ doc_iterator::ptr VariadicPhraseQuery::execute( // get features required for query & order const IndexFeatures features = ord.features() | kRequiredFeatures; - ScoreAdapters conj_itrs; + ScoreAdapters conj_itrs; conj_itrs.reserve(phrase_state->terms.size()); const auto phrase_size = phrase_state->num_terms.size(); @@ -297,7 +297,7 @@ doc_iterator::ptr VariadicPhraseQuery::ExecuteWithOffsets( const irs::SubReader& rdr) const { using Adapter = VariadicPhraseOffsetAdapter; using FixedPhraseIterator = PhraseIterator< - Conjunction, + Conjunction, NoopAggregator>, PhrasePosition>>; using CompundDocIterator = irs::compound_doc_iterator; using Disjunction = disjunction; @@ -310,7 +310,7 @@ doc_iterator::ptr VariadicPhraseQuery::ExecuteWithOffsets( return doc_iterator::empty(); } - ScoreAdapters conj_itrs; + ScoreAdapters conj_itrs; conj_itrs.reserve(phrase_state->terms.size()); const auto phrase_size = phrase_state->num_terms.size(); diff --git a/core/search/same_position_filter.cpp b/core/search/same_position_filter.cpp index 20cdc43f2..b329dfd28 100644 --- a/core/search/same_position_filter.cpp +++ b/core/search/same_position_filter.cpp @@ -131,7 +131,7 @@ class same_position_query : public filter::prepared { const IndexFeatures features = ord.features() | by_same_position::kRequiredFeatures; - ScoreAdapters itrs; + ScoreAdapters itrs; itrs.reserve(query_state->size()); std::vector positions; diff --git a/tests/search/boolean_filter_tests.cpp b/tests/search/boolean_filter_tests.cpp index 341a3e9cd..1de2c8621 100644 --- a/tests/search/boolean_filter_tests.cpp +++ b/tests/search/boolean_filter_tests.cpp @@ -12439,8 +12439,7 @@ TEST(disjunction_test, scored_seek_next) { // ---------------------------------------------------------------------------- TEST(min_match_disjunction_test, next) { - using disjunction = - irs::min_match_disjunction; + using disjunction = irs::MinMatchDisjunction; // single dataset { std::vector> docs{ @@ -12451,9 +12450,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 0; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12470,9 +12468,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 1; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12490,9 +12487,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12510,9 +12506,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{}; std::vector result; { - disjunction it{ - detail::execute_all(docs), - min_match_count}; + disjunction it{detail::execute_all>(docs), + min_match_count}; auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12530,9 +12525,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{}; std::vector result; { - disjunction it{ - detail::execute_all(docs), - min_match_count}; + disjunction it{detail::execute_all>(docs), + min_match_count}; auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); @@ -12561,9 +12555,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected = detail::union_all(docs); std::vector result; { - disjunction it{ - detail::execute_all(docs), - min_match_count}; + disjunction it{detail::execute_all>(docs), + min_match_count}; auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12581,9 +12574,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected = detail::union_all(docs); std::vector result; { - disjunction it{ - detail::execute_all(docs), - min_match_count}; + disjunction it{detail::execute_all>(docs), + min_match_count}; auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12601,9 +12593,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{7}; std::vector result; { - disjunction it{ - detail::execute_all(docs), - min_match_count}; + disjunction it{detail::execute_all>(docs), + min_match_count}; auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12621,9 +12612,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12642,9 +12632,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12663,9 +12652,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12684,9 +12672,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12715,9 +12702,8 @@ TEST(min_match_disjunction_test, next) { 9, 11, 12, 13, 29, 45}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12736,9 +12722,8 @@ TEST(min_match_disjunction_test, next) { 9, 11, 12, 13, 29, 45}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12756,9 +12741,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{1, 2, 5, 6, 29}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12776,9 +12760,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{1, 5}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12797,9 +12780,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{1, 5}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12818,9 +12800,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{1, 5}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12839,9 +12820,8 @@ TEST(min_match_disjunction_test, next) { std::vector expected{1, 5}; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12869,9 +12849,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 0; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12889,9 +12868,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 1; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12908,9 +12886,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 2; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12927,9 +12904,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 3; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12947,9 +12923,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 4; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12967,9 +12942,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = 5; std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -12987,9 +12961,8 @@ TEST(min_match_disjunction_test, next) { const size_t min_match_count = std::numeric_limits::max(); std::vector result; { - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -13013,8 +12986,7 @@ TEST(min_match_disjunction_test, next) { { std::vector result; { - disjunction it( - detail::execute_all(docs), 0U); + disjunction it(detail::execute_all>(docs), 0U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -13030,8 +13002,7 @@ TEST(min_match_disjunction_test, next) { { std::vector result; { - disjunction it( - detail::execute_all(docs), 1U); + disjunction it(detail::execute_all>(docs), 1U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -13047,9 +13018,8 @@ TEST(min_match_disjunction_test, next) { { std::vector result; { - disjunction it( - detail::execute_all(docs), - std::numeric_limits::max()); + disjunction it(detail::execute_all>(docs), + std::numeric_limits::max()); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); ASSERT_EQ(irs::doc_limits::invalid(), it.value()); @@ -13066,8 +13036,7 @@ TEST(min_match_disjunction_test, next) { } TEST(min_match_disjunction_test, seek) { - using disjunction = - irs::min_match_disjunction; + using disjunction = irs::MinMatchDisjunction; // simple case { @@ -13088,9 +13057,8 @@ TEST(min_match_disjunction_test, seek) { {45, 45}, {57, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13112,9 +13080,8 @@ TEST(min_match_disjunction_test, seek) { {45, 45}, {57, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13134,9 +13101,8 @@ TEST(min_match_disjunction_test, seek) { {29, 29}, {45, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13152,9 +13118,8 @@ TEST(min_match_disjunction_test, seek) { {1, 1}, {6, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13170,9 +13135,8 @@ TEST(min_match_disjunction_test, seek) { {1, 1}, {6, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13205,9 +13169,8 @@ TEST(min_match_disjunction_test, seek) { {513, 1025}, {2001, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13230,9 +13193,8 @@ TEST(min_match_disjunction_test, seek) { {513, 1025}, {2001, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13252,9 +13214,8 @@ TEST(min_match_disjunction_test, seek) { {101, 101}, {513, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13270,9 +13231,8 @@ TEST(min_match_disjunction_test, seek) { {1, 1}, {6, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13289,9 +13249,8 @@ TEST(min_match_disjunction_test, seek) { {1, irs::doc_limits::eof()}, {6, irs::doc_limits::eof()}}; - disjunction it( - detail::execute_all(docs), - min_match_count); + disjunction it(detail::execute_all>(docs), + min_match_count); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13311,8 +13270,7 @@ TEST(min_match_disjunction_test, seek) { {irs::doc_limits::invalid(), irs::doc_limits::eof()}}; { - disjunction it( - detail::execute_all(docs), 0U); + disjunction it(detail::execute_all>(docs), 0U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13321,8 +13279,7 @@ TEST(min_match_disjunction_test, seek) { } { - disjunction it( - detail::execute_all(docs), 1U); + disjunction it(detail::execute_all>(docs), 1U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13331,9 +13288,8 @@ TEST(min_match_disjunction_test, seek) { } { - disjunction it( - detail::execute_all(docs), - std::numeric_limits::max()); + disjunction it(detail::execute_all>(docs), + std::numeric_limits::max()); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13362,8 +13318,7 @@ TEST(min_match_disjunction_test, seek) { {57, irs::doc_limits::eof()}}; { - disjunction it( - detail::execute_all(docs), 0U); + disjunction it(detail::execute_all>(docs), 0U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13372,8 +13327,7 @@ TEST(min_match_disjunction_test, seek) { } { - disjunction it( - detail::execute_all(docs), 1U); + disjunction it(detail::execute_all>(docs), 1U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13382,8 +13336,7 @@ TEST(min_match_disjunction_test, seek) { } { - disjunction it( - detail::execute_all(docs), 2U); + disjunction it(detail::execute_all>(docs), 2U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13392,9 +13345,8 @@ TEST(min_match_disjunction_test, seek) { } { - disjunction it( - detail::execute_all(docs), - std::numeric_limits::max()); + disjunction it(detail::execute_all>(docs), + std::numeric_limits::max()); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13424,8 +13376,7 @@ TEST(min_match_disjunction_test, seek) { {1201, irs::doc_limits::eof()}}; { - disjunction it( - detail::execute_all(docs), 0U); + disjunction it(detail::execute_all>(docs), 0U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13434,8 +13385,7 @@ TEST(min_match_disjunction_test, seek) { } { - disjunction it( - detail::execute_all(docs), 1U); + disjunction it(detail::execute_all>(docs), 1U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13452,8 +13402,7 @@ TEST(min_match_disjunction_test, seek) { {12, irs::doc_limits::eof()}}; { - disjunction it( - detail::execute_all(docs), 2U); + disjunction it(detail::execute_all>(docs), 2U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13470,8 +13419,7 @@ TEST(min_match_disjunction_test, seek) { }; { - disjunction it( - detail::execute_all(docs), 3U); + disjunction it(detail::execute_all>(docs), 3U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13489,8 +13437,7 @@ TEST(min_match_disjunction_test, seek) { }; { - disjunction it( - detail::execute_all(docs), 5U); + disjunction it(detail::execute_all>(docs), 5U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13499,9 +13446,8 @@ TEST(min_match_disjunction_test, seek) { } { - disjunction it( - detail::execute_all(docs), - std::numeric_limits::max()); + disjunction it(detail::execute_all>(docs), + std::numeric_limits::max()); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); for (const auto& target : expected) { @@ -13513,15 +13459,13 @@ TEST(min_match_disjunction_test, seek) { } TEST(min_match_disjunction_test, seek_next) { - using disjunction = - irs::min_match_disjunction; + using disjunction = irs::MinMatchDisjunction; { std::vector> docs{ {1, 2, 5, 7, 9, 11, 45}, {1, 5, 6, 12, 29}, {1, 5, 6, 9, 29}}; - disjunction it( - detail::execute_all(docs), 2U); + disjunction it(detail::execute_all>(docs), 2U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); @@ -13557,15 +13501,13 @@ TEST(min_match_disjunction_test, seek_next) { } TEST(min_match_disjunction_test, match_count) { - using disjunction = - irs::min_match_disjunction; + using disjunction = irs::MinMatchDisjunction; { std::vector> docs{ {1, 3}, {1, 2, 3, 4}, {1, 3, 4}, {1, 3, 4}}; - disjunction it( - detail::execute_all(docs), 1U); + disjunction it(detail::execute_all>(docs), 1U); auto* doc = irs::get(it); ASSERT_TRUE(bool(doc)); @@ -13605,9 +13547,8 @@ TEST(min_match_disjunction_test, scored_seek_next) { auto it_ptr = irs::ResoveMergeType( irs::ScoreMergeType::kMax, 0, [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction = - irs::min_match_disjunction; - using adapter = typename disjunction::cost_iterator_adapter; + using disjunction = irs::MinMatchDisjunction; + using adapter = typename irs::CostAdapter<>; auto res = detail::execute_all(docs); @@ -13615,8 +13556,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { std::move(aggregator)); }); - using ExpectedType = - irs::min_match_disjunction; + using ExpectedType = irs::MinMatchDisjunction; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -13663,9 +13603,8 @@ TEST(min_match_disjunction_test, scored_seek_next) { auto it_ptr = irs::ResoveMergeType( irs::ScoreMergeType::kSum, 1, [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction = - irs::min_match_disjunction; - using adapter = typename disjunction::cost_iterator_adapter; + using disjunction = irs::MinMatchDisjunction; + using adapter = typename irs::CostAdapter<>; auto res = detail::execute_all(docs); @@ -13674,8 +13613,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { }); using ExpectedType = - irs::min_match_disjunction>; + irs::MinMatchDisjunction>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -13735,9 +13673,8 @@ TEST(min_match_disjunction_test, scored_seek_next) { auto it_ptr = irs::ResoveMergeType( irs::ScoreMergeType::kMax, 1, [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction = - irs::min_match_disjunction; - using adapter = typename disjunction::cost_iterator_adapter; + using disjunction = irs::MinMatchDisjunction; + using adapter = typename irs::CostAdapter<>; auto res = detail::execute_all(docs); @@ -13746,8 +13683,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { }); using ExpectedType = - irs::min_match_disjunction>; + irs::MinMatchDisjunction>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -13806,9 +13742,8 @@ TEST(min_match_disjunction_test, scored_seek_next) { auto it_ptr = irs::ResoveMergeType( irs::ScoreMergeType::kSum, 1, [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction = - irs::min_match_disjunction; - using adapter = typename disjunction::cost_iterator_adapter; + using disjunction = irs::MinMatchDisjunction; + using adapter = typename irs::CostAdapter<>; auto res = detail::execute_all(docs); @@ -13817,8 +13752,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { }); using ExpectedType = - irs::min_match_disjunction>; + irs::MinMatchDisjunction>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -13880,9 +13814,8 @@ TEST(min_match_disjunction_test, scored_seek_next) { auto it_ptr = irs::ResoveMergeType( irs::ScoreMergeType::kMax, 1, [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction = - irs::min_match_disjunction; - using adapter = typename disjunction::cost_iterator_adapter; + using disjunction = irs::MinMatchDisjunction; + using adapter = typename irs::CostAdapter<>; auto res = detail::execute_all(docs); @@ -13891,8 +13824,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { }); using ExpectedType = - irs::min_match_disjunction>; + irs::MinMatchDisjunction>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -13948,9 +13880,8 @@ TEST(min_match_disjunction_test, scored_seek_next) { auto it_ptr = irs::ResoveMergeType( irs::ScoreMergeType::kSum, 1, [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction = - irs::min_match_disjunction; - using adapter = typename disjunction::cost_iterator_adapter; + using disjunction = irs::MinMatchDisjunction; + using adapter = typename irs::CostAdapter<>; auto res = detail::execute_all(docs); @@ -13959,8 +13890,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { }); using ExpectedType = - irs::min_match_disjunction>; + irs::MinMatchDisjunction>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14016,9 +13946,8 @@ TEST(min_match_disjunction_test, scored_seek_next) { auto it_ptr = irs::ResoveMergeType( irs::ScoreMergeType::kMax, 1, [&](A&& aggregator) -> irs::doc_iterator::ptr { - using disjunction = - irs::min_match_disjunction; - using adapter = typename disjunction::cost_iterator_adapter; + using disjunction = irs::MinMatchDisjunction; + using adapter = typename irs::CostAdapter<>; auto res = detail::execute_all(docs); @@ -14027,8 +13956,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { }); using ExpectedType = - irs::min_match_disjunction>; + irs::MinMatchDisjunction>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14075,7 +14003,7 @@ TEST(min_match_disjunction_test, scored_seek_next) { // ---------------------------------------------------------------------------- // --SECTION-- iterator0 AND iterator1 AND iterator2 AND ... // ---------------------------------------------------------------------------- -using DocIterator = irs::ScoreAdapter; +using DocIterator = irs::ScoreAdapter<>; TEST(conjunction_test, next) { auto shortest = [](const std::vector& lhs, @@ -14514,8 +14442,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14579,7 +14507,7 @@ TEST(conjunction_test, scored_seek_next) { }); using ExpectedType = - irs::Conjunction; + irs::Conjunction, irs::NoopAggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14633,8 +14561,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14697,8 +14625,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14761,8 +14689,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14825,8 +14753,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14887,8 +14815,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -14949,8 +14877,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -15012,8 +14940,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -15075,8 +15003,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -15135,8 +15063,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); @@ -15195,8 +15123,8 @@ TEST(conjunction_test, scored_seek_next) { return irs::MakeConjunction({}, std::move(aggregator), std::move(res)); }); - using ExpectedType = irs::Conjunction>; + using ExpectedType = + irs::Conjunction, irs::Aggregator>; ASSERT_NE(nullptr, dynamic_cast(it_ptr.get())); auto& it = dynamic_cast(*it_ptr); From 8bb8f613d834d324c5efdcb38bd279bc6b568424 Mon Sep 17 00:00:00 2001 From: Valery Mironov <32071355+MBkkt@users.noreply.github.com> Date: Fri, 1 Dec 2023 13:02:02 +0100 Subject: [PATCH 2/2] WIP --- core/search/ngram_similarity_query.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/search/ngram_similarity_query.cpp b/core/search/ngram_similarity_query.cpp index 85d5249b3..39d1f39a5 100644 --- a/core/search/ngram_similarity_query.cpp +++ b/core/search/ngram_similarity_query.cpp @@ -581,12 +581,16 @@ doc_iterator::ptr NGramSimilarityQuery::execute( if (itrs.size() < min_match_count_) { return doc_iterator::empty(); } + // TODO(MBkkt) itrs.size() == 1: return itrs_[0], but needs to add score + // optimization for single ngram case if (itrs.size() == min_match_count_) { return memory::make_managed, SerialPositionsChecker>>( std::move(itrs), segment, *query_state->field, boost(), stats_.c_str(), query_state->terms.size(), min_match_count_, ord); } + // TODO(MBkkt) min_match_count_ == 1: disjunction for approx, + // optimization for low threshold case return memory::make_managed, SerialPositionsChecker>>( std::move(itrs), segment, *query_state->field, boost(), stats_.c_str(), @@ -607,11 +611,15 @@ doc_iterator::ptr NGramSimilarityQuery::ExecuteWithOffsets( if (itrs.size() < min_match_count_) { return doc_iterator::empty(); } + // TODO(MBkkt) itrs.size() == 1: return itrs_[0], but needs to add score + // optimization for single ngram case if (itrs.size() == min_match_count_) { return memory::make_managed, SerialPositionsChecker>>( std::move(itrs), query_state->terms.size(), min_match_count_, true); } + // TODO(MBkkt) min_match_count_ == 1: disjunction for approx, + // optimization for low threshold case return memory::make_managed, SerialPositionsChecker>>( std::move(itrs), query_state->terms.size(), min_match_count_, true);