Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Wildcard preparation 2 #579

Merged
merged 2 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ set(IResearch_core_sources
./search/multiterm_query.cpp
./search/term_query.cpp
./search/boolean_filter.cpp
./search/boolean_query.cpp
./search/ngram_similarity_filter.cpp
./search/ngram_similarity_query.cpp
./search/proxy_filter.cpp
Expand Down
264 changes: 1 addition & 263 deletions core/search/boolean_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "exclusion.hpp"
#include "min_match_disjunction.hpp"
#include "prepared_state_visitor.hpp"
#include "search/boolean_query.hpp"

namespace {

Expand All @@ -45,273 +46,10 @@ std::pair<const irs::filter*, bool> optimize_not(const irs::Not& node) {
return std::make_pair(inner, neg);
}

template<bool Conjunction, typename It>
irs::ScoreAdapters<irs::doc_iterator::ptr> MakeScoreAdapters(
const irs::ExecutionContext& ctx, It begin, It end) {
IRS_ASSERT(begin <= end);
const size_t size = std::distance(begin, end);
irs::ScoreAdapters<irs::doc_iterator::ptr> itrs;
itrs.reserve(size);
if (Conjunction || size > 1) {
ctx.wand.root = false;
// TODO(MBkkt) ctx.wand.strict = true;
// We couldn't do this for few reasons:
// 1. It's small chance that we will use just term iterator (or + eof)
// 2. I'm not sure about precision
}
do {
auto docs = (*begin)->execute(ctx);
++begin;

// filter out empty iterators
if (irs::doc_limits::eof(docs->value())) {
if constexpr (Conjunction) {
return {};
} else {
continue;
}
}

itrs.emplace_back(std::move(docs));
} while (begin != end);

return itrs;
}

// Returns disjunction iterator created from the specified queries
template<typename QueryIterator, typename... Args>
irs::doc_iterator::ptr make_disjunction(const irs::ExecutionContext& ctx,
irs::ScoreMergeType merge_type,
QueryIterator begin, QueryIterator end,
Args&&... args) {
IRS_ASSERT(begin <= end);
const size_t size = std::distance(begin, end);
// check the size before the execution
if (0 == size) {
// empty or unreachable search criteria
return irs::doc_iterator::empty();
}

auto itrs = MakeScoreAdapters<false>(ctx, begin, end);
if (itrs.empty()) {
return irs::doc_iterator::empty();
}

return irs::ResoveMergeType(
merge_type, ctx.scorers.buckets().size(),
[&]<typename A>(A&& aggregator) -> irs::doc_iterator::ptr {
using disjunction_t =
irs::disjunction_iterator<irs::doc_iterator::ptr, A>;

return irs::MakeDisjunction<disjunction_t>(ctx.wand, std::move(itrs),
std::move(aggregator),
std::forward<Args>(args)...);
});
}

// Returns conjunction iterator created from the specified queries
template<typename QueryIterator, typename... Args>
irs::doc_iterator::ptr make_conjunction(const irs::ExecutionContext& ctx,
irs::ScoreMergeType merge_type,
QueryIterator begin, QueryIterator end,
Args&&... args) {
IRS_ASSERT(begin <= end);
const size_t size = std::distance(begin, end);
// check size before the execution
switch (size) {
case 0:
return irs::doc_iterator::empty();
case 1:
return (*begin)->execute(ctx);
}

auto itrs = MakeScoreAdapters<true>(ctx, begin, end);
if (itrs.empty()) {
return irs::doc_iterator::empty();
}

return irs::ResoveMergeType(
merge_type, ctx.scorers.buckets().size(),
[&]<typename A>(A&& aggregator) -> irs::doc_iterator::ptr {
return irs::MakeConjunction(ctx.wand, std::move(aggregator),
std::move(itrs), std::forward<Args>(args)...);
});
}

} // namespace

namespace irs {

// Base class for boolean queries
class BooleanQuery : public filter::prepared {
public:
using queries_t = ManagedVector<filter::prepared::ptr>;
using iterator = queries_t::const_iterator;

BooleanQuery() noexcept : excl_{0} {}

doc_iterator::ptr execute(const ExecutionContext& ctx) const final {
if (empty()) {
return doc_iterator::empty();
}

IRS_ASSERT(excl_);
const auto excl_begin = this->excl_begin();
const auto end = this->end();

auto incl = execute(ctx, begin(), excl_begin);

if (excl_begin == end) {
return incl;
}

// exclusion part does not affect scoring at all
auto excl = ::make_disjunction(
{.segment = ctx.segment, .scorers = Scorers::kUnordered, .ctx = ctx.ctx},
irs::ScoreMergeType::kSum, excl_begin, end);

// got empty iterator for excluded
if (doc_limits::eof(excl->value())) {
// pure conjunction/disjunction
return incl;
}

return memory::make_managed<exclusion>(std::move(incl), std::move(excl));
}

void visit(const irs::SubReader& segment, irs::PreparedStateVisitor& visitor,
score_t boost) const final {
boost *= this->boost();

if (!visitor.Visit(*this, boost)) {
return;
}

// FIXME(gnusi): visit exclude group?
for (auto it = begin(), end = excl_begin(); it != end; ++it) {
(*it)->visit(segment, visitor, boost);
}
}

void prepare(const PrepareContext& ctx, ScoreMergeType merge_type,
std::span<const filter* const> incl,
std::span<const filter* const> excl) {
BooleanQuery::queries_t queries{{ctx.memory}};
queries.reserve(incl.size() + excl.size());

// apply boost to the current node
this->boost(ctx.boost);

// prepare included
for (const auto* filter : incl) {
queries.emplace_back(filter->prepare(ctx));
}

// prepare excluded
for (const auto* filter : excl) {
// exclusion part does not affect scoring at all
queries.emplace_back(filter->prepare({
.index = ctx.index,
.memory = ctx.memory,
.ctx = ctx.ctx,
}));
}

// nothrow block
queries_ = std::move(queries);
excl_ = incl.size();
merge_type_ = merge_type;
}

iterator begin() const { return queries_.begin(); }
iterator excl_begin() const { return begin() + excl_; }
iterator end() const { return queries_.end(); }

bool empty() const { return queries_.empty(); }
size_t size() const { return queries_.size(); }

protected:
virtual doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const = 0;

ScoreMergeType merge_type() const noexcept { return merge_type_; }

private:
// 0..excl_-1 - included queries
// excl_..queries.end() - excluded queries
queries_t queries_;
// index of the first excluded query
size_t excl_;
ScoreMergeType merge_type_{ScoreMergeType::kSum};
};

// Represent a set of queries joint by "And"
class AndQuery : public BooleanQuery {
public:
doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const final {
return ::make_conjunction(ctx, merge_type(), begin, end);
}
};

// Represent a set of queries joint by "Or"
class OrQuery : public BooleanQuery {
public:
doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const final {
return ::make_disjunction(ctx, merge_type(), begin, end);
}
};

// Represent a set of queries joint by "Or" with the specified
// minimum number of clauses that should satisfy criteria
class MinMatchQuery : public BooleanQuery {
public:
explicit MinMatchQuery(size_t min_match_count) noexcept
: min_match_count_{min_match_count} {
IRS_ASSERT(min_match_count_ > 1);
}

doc_iterator::ptr execute(const ExecutionContext& ctx, iterator begin,
iterator end) const final {
IRS_ASSERT(std::distance(begin, end) >= 0);
const size_t size = size_t(std::distance(begin, end));

// 1 <= min_match_count
size_t min_match_count = std::max(size_t(1), min_match_count_);

// check the size before the execution
if (0 == size || min_match_count > size) {
// empty or unreachable search criteria
return doc_iterator::empty();
} else if (min_match_count == size) {
// pure conjunction
return ::make_conjunction(ctx, merge_type(), begin, end);
}

// min_match_count <= size
min_match_count = std::min(size, min_match_count);

auto itrs = MakeScoreAdapters<false>(ctx, begin, end);
if (itrs.empty()) {
return irs::doc_iterator::empty();
}

return ResoveMergeType(
merge_type(), ctx.scorers.buckets().size(),
[&]<typename A>(A&& aggregator) -> doc_iterator::ptr {
// FIXME(gnusi): use FAST version
using disjunction_t = min_match_iterator<doc_iterator::ptr, A>;

return MakeWeakDisjunction<disjunction_t, A>(
ctx.wand, std::move(itrs), min_match_count, std::move(aggregator));
});
}

private:
size_t min_match_count_;
};

size_t boolean_filter::hash() const noexcept {
size_t seed = 0;

Expand Down
Loading