Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Wildcard preparation 2 #579

Merged
merged 2 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ set(IResearch_core_sources
./search/multiterm_query.cpp
./search/term_query.cpp
./search/boolean_filter.cpp
./search/boolean_query.cpp
./search/ngram_similarity_filter.cpp
./search/ngram_similarity_query.cpp
./search/proxy_filter.cpp
Expand Down
264 changes: 1 addition & 263 deletions core/search/boolean_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "exclusion.hpp"
#include "min_match_disjunction.hpp"
#include "prepared_state_visitor.hpp"
#include "search/boolean_query.hpp"

namespace {

Expand All @@ -45,273 +46,10 @@ std::pair<const irs::filter*, bool> optimize_not(const irs::Not& node) {
return std::make_pair(inner, neg);
}

template<bool Conjunction, typename It>
irs::ScoreAdapters<irs::doc_iterator::ptr> MakeScoreAdapters(
const irs::ExecutionContext& ctx, It begin, It end) {
IRS_ASSERT(begin <= end);
const size_t size = std::distance(begin, end);
irs::ScoreAdapters<irs::doc_iterator::ptr> itrs;
itrs.reserve(size);
if (Conjunction || size > 1) {
ctx.wand.root = false;
// TODO(MBkkt) ctx.wand.strict = true;
// We couldn't do this for few reasons:
// 1. It's small chance that we will use just term iterator (or + eof)
// 2. I'm not sure about precision
}
do {
auto docs = (*begin)->execute(ctx);
++begin;

// filter out empty iterators
if (irs::doc_limits::eof(docs->value())) {
if constexpr (Conjunction) {
return {};
} else {
continue;
}
}

itrs.emplace_back(std::move(docs));
} while (begin != end);

return itrs;
}

// Returns disjunction iterator created from the specified queries
template<typename QueryIterator, typename... Args>
irs::doc_iterator::ptr make_disjunction(const irs::ExecutionContext& ctx,
irs::ScoreMergeType merge_type,
QueryIterator begin, QueryIterator end,
Args&&... args) {
IRS_ASSERT(begin <= end);
const size_t size = std::distance(begin, end);
// check the size before the execution
if (0 == size) {
// empty or unreachable search criteria
return irs::doc_iterator::empty();
}

auto itrs = MakeScoreAdapters<false>(ctx, begin, end);
if (itrs.empty()) {
return irs::doc_iterator::empty();
}

return irs::ResoveMergeType(
merge_type, ctx.scorers.buckets().size(),
[&]<typename A>(A&& aggregator) -> irs::doc_iterator::ptr {
using disjunction_t =
irs::disjunction_iterator<irs::doc_iterator::ptr, A>;

return irs::MakeDisjunction<disjunction_t>(ctx.wand, std::move(itrs),
std::move(aggregator),
std::forward<Args>(args)...);
});
}

// Returns conjunction iterator created from the specified queries
template<typename QueryIterator, typename... Args>
irs::doc_iterator::ptr make_conjunction(const irs::ExecutionContext& ctx,
irs::ScoreMergeType merge_type,
QueryIterator begin, QueryIterator end,
Args&&... args) {
IRS_ASSERT(begin <= end);
const size_t size = std::distance(begin, end);
// check size before the execution
switch (size) {
case 0:
return irs::doc_iterator::empty();
case 1:
return (*begin)->execute(ctx);
}

auto itrs = MakeScoreAdapters<true>(ctx, begin, end);
if (itrs.empty()) {
return irs::doc_iterator::empty();
}

return irs::ResoveMergeType(
merge_type, ctx.scorers.buckets().size(),
[&]<typename A>(A&& aggregator) -> irs::doc_iterator::ptr {
return irs::MakeConjunction(ctx.wand, std::move(aggregator),
std::move(itrs), std::forward<Args>(args)...);
});
}

} // namespace

namespace irs {

// Base class for boolean queries
class BooleanQuery : public filter::prepared {
public:
using queries_t = ManagedVector<filter::prepared::ptr>;
using iterator = queries_t::const_iterator;

BooleanQuery() noexcept : excl_{0} {}

doc_iterator::ptr execute(const ExecutionContext& ctx) const final {
if (empty()) {
return doc_iterator::empty();
}

IRS_ASSERT(excl_);
const auto excl_begin = this->excl_begin();
const auto end = this->end();

auto incl = execute(ctx, begin(), excl_begin);

if (excl_begin == end) {
return incl;
}

// exclusion part does not affect scoring at all
auto excl = ::make_disjunction(
{.segment = ctx.segment, .scorers = Scorers::kUnordered, .ctx = ctx.ctx},
irs::ScoreMergeType::kSum, excl_begin, end);

// got empty iterator for excluded
if (doc_limits::eof(excl->value())) {
// pure conjunction/disjunction
return incl;
}

return memory::make_managed<exclusion>(std::move(incl), std::move(excl));
}

void visit(const irs::SubReader& segment, irs::PreparedStateVisitor& visitor,
score_t boost) const final {
boost *= this->boost();

if (!visitor.Visit(*this, boost)) {
return;
}

// FIXME(gnusi): visit exclude group?
for (auto it = begin(), end = excl_begin(); it != end; ++it) {
(*it)->visit(segment, visitor, boost);
}
}

void prepare(const PrepareContext& ctx, ScoreMergeType merge_type,
std::span<const filter* const> incl,
std::span<const filter* const> excl) {
BooleanQuery::queries_t queries{{ctx.memory}};
queries.reserve(incl.size() + excl.size());

// apply boost to the current node
this->boost(ctx.boost);

// prepare included
for (const auto* filter : incl) {
queries.emplace_back(filter->prepare(ctx));
}

// prepare excluded
for (const auto* filter : excl) {
// exclusion part does not affect scoring at all
queries.emplace_back(filter->prepare({
.index = ctx.index,
.memory = ctx.memory,
.ctx = ctx.ctx,
}));
}

// nothrow block
queries_ = std::move(queries);
excl_ = incl.size();
merge_type_ = merge_type;
}

iterator begin() const { return queries_.begin(); }
iterator excl_begin() const { return begin() + excl_; }
iterator end() const { return queries_.end(); }

bool empty() const { return queries_.empty(); }
size_t size() const { return queries_.size(); }

protected:
virtual doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const = 0;

ScoreMergeType merge_type() const noexcept { return merge_type_; }

private:
// 0..excl_-1 - included queries
// excl_..queries.end() - excluded queries
queries_t queries_;
// index of the first excluded query
size_t excl_;
ScoreMergeType merge_type_{ScoreMergeType::kSum};
};

// Represent a set of queries joint by "And"
class AndQuery : public BooleanQuery {
public:
doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const final {
return ::make_conjunction(ctx, merge_type(), begin, end);
}
};

// Represent a set of queries joint by "Or"
class OrQuery : public BooleanQuery {
public:
doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const final {
return ::make_disjunction(ctx, merge_type(), begin, end);
}
};

// Represent a set of queries joint by "Or" with the specified
// minimum number of clauses that should satisfy criteria
class MinMatchQuery : public BooleanQuery {
public:
explicit MinMatchQuery(size_t min_match_count) noexcept
: min_match_count_{min_match_count} {
IRS_ASSERT(min_match_count_ > 1);
}

doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const final {
IRS_ASSERT(std::distance(begin, end) >= 0);
const size_t size = size_t(std::distance(begin, end));

// 1 <= min_match_count
size_t min_match_count = std::max(size_t(1), min_match_count_);

// check the size before the execution
if (0 == size || min_match_count > size) {
// empty or unreachable search criteria
return doc_iterator::empty();
} else if (min_match_count == size) {
// pure conjunction
return ::make_conjunction(ctx, merge_type(), begin, end);
}

// min_match_count <= size
min_match_count = std::min(size, min_match_count);

auto itrs = MakeScoreAdapters<false>(ctx, begin, end);
if (itrs.empty()) {
return irs::doc_iterator::empty();
}

return ResoveMergeType(
merge_type(), ctx.scorers.buckets().size(),
[&]<typename A>(A&& aggregator) -> doc_iterator::ptr {
// FIXME(gnusi): use FAST version
using disjunction_t = min_match_iterator<doc_iterator::ptr, A>;

return MakeWeakDisjunction<disjunction_t, A>(
ctx.wand, std::move(itrs), min_match_count, std::move(aggregator));
});
}

private:
size_t min_match_count_;
};

size_t boolean_filter::hash() const noexcept {
size_t seed = 0;

Expand Down
Loading