Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Make filter/prepared smaller, important for removes
Browse files Browse the repository at this point in the history
  • Loading branch information
MBkkt committed Dec 23, 2023
1 parent bd5334e commit 1c70c6b
Show file tree
Hide file tree
Showing 25 changed files with 159 additions and 132 deletions.
19 changes: 6 additions & 13 deletions core/analysis/pipeline_token_stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,15 @@ class pipeline_token_stream final : public TypedAnalyzer<pipeline_token_stream>,
template<typename Visitor>
bool visit_members(Visitor&& visitor) const {
for (const auto& sub : pipeline_) {
if (sub.get_stream().type() ==
type()) { // pipe inside pipe - forward visiting
#if IRESEARCH_DEBUG
const auto& sub_pipe =
dynamic_cast<const pipeline_token_stream&>(sub.get_stream());
#else
const auto& sub_pipe =
static_cast<const pipeline_token_stream&>(sub.get_stream());
#endif
const auto& stream = sub.get_stream();
if (stream.type() == type()) {
// pipe inside pipe - forward visiting
const auto& sub_pipe = DownCast<pipeline_token_stream>(stream);
if (!sub_pipe.visit_members(visitor)) {
return false;
}
} else {
if (!visitor(sub.get_stream())) {
return false;
}
} else if (!visitor(sub.get_stream())) {
return false;
}
}
return true;
Expand Down
2 changes: 1 addition & 1 deletion core/search/all_docs_provider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

namespace irs {

filter::ptr AllDocsProvider::Default(score_t boost) {
FilterWithBoost::Ptr AllDocsProvider::Default(score_t boost) {
auto filter = std::make_unique<all>();
filter->boost(boost);
return filter;
Expand Down
6 changes: 3 additions & 3 deletions core/search/all_docs_provider.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ namespace irs {

class AllDocsProvider {
public:
using ProviderFunc = std::function<filter::ptr(score_t)>;
using ProviderFunc = std::function<FilterWithBoost::Ptr(score_t)>;

static filter::ptr Default(score_t boost);
static FilterWithBoost::Ptr Default(score_t boost);

filter::ptr MakeAllDocsFilter(score_t boost) const {
FilterWithBoost::Ptr MakeAllDocsFilter(score_t boost) const {
return all_docs_(boost);
}

Expand Down
7 changes: 5 additions & 2 deletions core/search/all_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,24 @@ namespace irs {
class all_query : public filter::prepared {
public:
explicit all_query(bstring&& stats, score_t boost)
: filter::prepared(boost), stats_(std::move(stats)) {}
: stats_{std::move(stats)}, boost_{boost} {}

doc_iterator::ptr execute(const ExecutionContext& ctx) const final {
auto& rdr = ctx.segment;

return memory::make_managed<AllIterator>(rdr, stats_.c_str(), ctx.scorers,
rdr.docs_count(), boost());
rdr.docs_count(), boost_);
}

void visit(const SubReader&, PreparedStateVisitor&, score_t) const final {
// No terms to visit
}

score_t boost() const noexcept final { return boost_; }

private:
bstring stats_;
score_t boost_;
};

filter::prepared::ptr all::prepare(const PrepareContext& ctx) const {
Expand Down
2 changes: 1 addition & 1 deletion core/search/all_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
namespace irs {

// Filter returning all documents
class all : public filter {
class all : public FilterWithBoost {
public:
filter::prepared::ptr prepare(const PrepareContext& ctx) const final;

Expand Down
52 changes: 23 additions & 29 deletions core/search/boolean_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,29 @@
#include "prepared_state_visitor.hpp"
#include "search/boolean_query.hpp"

namespace irs {
namespace {

// first - pointer to the innermost not "not" node
// second - collapsed negation mark
std::pair<const irs::filter*, bool> optimize_not(const irs::Not& node) {
std::pair<const filter*, bool> optimize_not(const Not& node) {
bool neg = true;
const irs::filter* inner = node.filter();
while (inner && inner->type() == irs::type<irs::Not>::id()) {
const auto* inner = node.filter();
while (inner != nullptr && inner->type() == type<Not>::id()) {
neg = !neg;
inner = static_cast<const irs::Not*>(inner)->filter();
inner = DownCast<Not>(inner)->filter();
}

return std::make_pair(inner, neg);
return std::pair{inner, neg};
}

} // namespace

namespace irs {

bool boolean_filter::equals(const filter& rhs) const noexcept {
if (!filter::equals(rhs)) {
return false;
}
const auto& typed_rhs = DownCast<boolean_filter>(rhs);
return filters_.size() == typed_rhs.size() &&
std::equal(begin(), end(), typed_rhs.begin(),
[](const filter::ptr& lhs, const filter::ptr& rhs) {
return *lhs == *rhs;
});
return std::equal(
begin(), end(), typed_rhs.begin(), typed_rhs.end(),
[](const auto& lhs, const auto& rhs) { return *lhs == *rhs; });
}

filter::prepared::ptr boolean_filter::prepare(const PrepareContext& ctx) const {
Expand All @@ -81,8 +75,8 @@ filter::prepared::ptr boolean_filter::prepare(const PrepareContext& ctx) const {
std::vector<const filter*> incl;
std::vector<const filter*> excl;

irs::filter::ptr all_docs_zero_boost;
irs::filter::ptr all_docs_no_boost;
FilterWithBoost::Ptr all_docs_zero_boost;
FilterWithBoost::Ptr all_docs_no_boost;

group_filters(all_docs_zero_boost, incl, excl);

Expand All @@ -95,29 +89,29 @@ filter::prepared::ptr boolean_filter::prepare(const PrepareContext& ctx) const {
return PrepareBoolean(incl, excl, ctx);
}

void boolean_filter::group_filters(filter::ptr& all_docs_zero_boost,
void boolean_filter::group_filters(FilterWithBoost::Ptr& all_docs_zero_boost,
std::vector<const filter*>& incl,
std::vector<const filter*>& excl) const {
incl.reserve(size() / 2);
excl.reserve(incl.capacity());

const irs::filter* empty_filter{nullptr};
const filter* empty_filter = nullptr;
const auto is_or = type() == irs::type<Or>::id();
for (auto begin = this->begin(), end = this->end(); begin != end; ++begin) {
if (irs::type<irs::empty>::id() == (*begin)->type()) {
empty_filter = begin->get();
for (const auto& filter : *this) {
if (irs::type<irs::empty>::id() == filter->type()) {
empty_filter = filter.get();
continue;
}
if (irs::type<Not>::id() == (*begin)->type()) {
const auto res = optimize_not(DownCast<Not>(**begin));
if (irs::type<Not>::id() == filter->type()) {
const auto res = optimize_not(DownCast<Not>(*filter));

if (!res.first) {
continue;
}

if (res.second) {
if (!all_docs_zero_boost) {
all_docs_zero_boost = MakeAllDocsFilter(0.f);
all_docs_zero_boost = MakeAllDocsFilter(0.F);
}

if (*all_docs_zero_boost == *res.first) {
Expand All @@ -135,7 +129,7 @@ void boolean_filter::group_filters(filter::ptr& all_docs_zero_boost,
incl.push_back(res.first);
}
} else {
incl.push_back(begin->get());
incl.push_back(filter.get());
}
}
if (empty_filter != nullptr) {
Expand Down Expand Up @@ -167,7 +161,7 @@ filter::prepared::ptr And::PrepareBoolean(std::vector<const filter*>& incl,
for (auto filter : incl) {
if (*filter == *cumulative_all) {
all_count++;
all_boost += filter->boost();
all_boost += DownCast<FilterWithBoost>(*filter).boost();
}
}
if (all_count != 0) {
Expand All @@ -191,7 +185,7 @@ filter::prepared::ptr And::PrepareBoolean(std::vector<const filter*>& incl,
// resulting boost will be: new_boost * OR_BOOST * LEFT_BOOST. If we
// substitute new_boost back we will get ( boost * OR_BOOST * ALL_BOOST +
// boost * OR_BOOST * LEFT_BOOST) - original non-optimized boost value
auto left_boost = (*incl.begin())->boost();
auto left_boost = (*incl.begin())->BoostImpl();
if (boost() != 0 && left_boost != 0 && !sub_ctx.scorers.empty()) {
sub_ctx.boost = (sub_ctx.boost * boost() * all_boost +
sub_ctx.boost * boost() * left_boost) /
Expand Down Expand Up @@ -257,7 +251,7 @@ filter::prepared::ptr Or::PrepareBoolean(std::vector<const filter*>& incl,
for (auto filter : incl) {
if (*filter == *cumulative_all) {
all_count++;
all_boost += filter->boost();
all_boost += DownCast<FilterWithBoost>(*filter).boost();
incl_all = filter;
}
}
Expand Down
46 changes: 21 additions & 25 deletions core/search/boolean_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
namespace irs {

// Represents user-side boolean filter as the container for other filters
class boolean_filter : public filter, public AllDocsProvider {
class boolean_filter : public FilterWithBoost, public AllDocsProvider {
public:
auto begin() const { return filters_.begin(); }
auto end() const { return filters_.end(); }
Expand Down Expand Up @@ -59,17 +59,17 @@ class boolean_filter : public filter, public AllDocsProvider {
bool empty() const { return filters_.empty(); }
size_t size() const { return filters_.size(); }

filter::prepared::ptr prepare(const PrepareContext& ctx) const override;
prepared::ptr prepare(const PrepareContext& ctx) const override;

protected:
bool equals(const filter& rhs) const noexcept final;

virtual filter::prepared::ptr PrepareBoolean(
std::vector<const filter*>& incl, std::vector<const filter*>& excl,
const PrepareContext& ctx) const = 0;
virtual prepared::ptr PrepareBoolean(std::vector<const filter*>& incl,
std::vector<const filter*>& excl,
const PrepareContext& ctx) const = 0;

private:
void group_filters(filter::ptr& all_docs_no_boost,
void group_filters(FilterWithBoost::Ptr& all_docs_zero_boost,
std::vector<const filter*>& incl,
std::vector<const filter*>& excl) const;

Expand All @@ -85,9 +85,9 @@ class And final : public boolean_filter {
}

protected:
filter::prepared::ptr PrepareBoolean(std::vector<const filter*>& incl,
std::vector<const filter*>& excl,
const PrepareContext& ctx) const final;
prepared::ptr PrepareBoolean(std::vector<const filter*>& incl,
std::vector<const filter*>& excl,
const PrepareContext& ctx) const final;
};

// Represents disjunction
Expand All @@ -102,45 +102,41 @@ class Or final : public boolean_filter {
return *this;
}

filter::prepared::ptr prepare(const PrepareContext& ctx) const final;
prepared::ptr prepare(const PrepareContext& ctx) const final;

type_info::type_id type() const noexcept final { return irs::type<Or>::id(); }

protected:
filter::prepared::ptr PrepareBoolean(std::vector<const filter*>& incl,
std::vector<const filter*>& excl,
const PrepareContext& ctx) const final;
prepared::ptr PrepareBoolean(std::vector<const filter*>& incl,
std::vector<const filter*>& excl,
const PrepareContext& ctx) const final;

private:
size_t min_match_count_{1};
};

// Represents negation
class Not : public filter, public AllDocsProvider {
class Not : public FilterWithBoost, public AllDocsProvider {
public:
const irs::filter* filter() const { return filter_.get(); }
const filter* filter() const { return filter_.get(); }

template<typename T>
const T* filter() const {
using type =
typename std::enable_if_t<std::is_base_of_v<irs::filter, T>, T>;

return static_cast<const type*>(filter_.get());
static_assert(std::is_base_of_v<irs::filter, T>);
return static_cast<const T*>(filter_.get());
}

template<typename T, typename... Args>
T& filter(Args&&... args) {
using type =
typename std::enable_if_t<std::is_base_of_v<irs::filter, T>, T>;

filter_ = std::make_unique<type>(std::forward<Args>(args)...);
return static_cast<type&>(*filter_);
static_assert(std::is_base_of_v<irs::filter, T>);
filter_ = std::make_unique<T>(std::forward<Args>(args)...);
return static_cast<T&>(*filter_);
}

void clear() { filter_.reset(); }
bool empty() const { return nullptr == filter_; }

filter::prepared::ptr prepare(const PrepareContext& ctx) const final;
prepared::ptr prepare(const PrepareContext& ctx) const final;

type_info::type_id type() const noexcept final {
return irs::type<Not>::id();
Expand Down
9 changes: 5 additions & 4 deletions core/search/boolean_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ doc_iterator::ptr BooleanQuery::execute(const ExecutionContext& ctx) const {
// exclusion part does not affect scoring at all
auto excl = make_disjunction(
{.segment = ctx.segment, .scorers = Scorers::kUnordered, .ctx = ctx.ctx},
irs::ScoreMergeType::kSum, excl_begin, end);
irs::ScoreMergeType::kNoop, excl_begin, end);

// got empty iterator for excluded
if (doc_limits::eof(excl->value())) {
Expand All @@ -156,7 +156,7 @@ doc_iterator::ptr BooleanQuery::execute(const ExecutionContext& ctx) const {
void BooleanQuery::visit(const irs::SubReader& segment,
irs::PreparedStateVisitor& visitor,
score_t boost) const {
boost *= this->boost();
boost *= boost_;

if (!visitor.Visit(*this, boost)) {
return;
Expand All @@ -167,10 +167,11 @@ void BooleanQuery::visit(const irs::SubReader& segment,
(*it)->visit(segment, visitor, boost);
}
}

void BooleanQuery::prepare(const PrepareContext& ctx, ScoreMergeType merge_type,
queries_t queries, size_t exclude_start) {
// apply boost to the current node
this->boost(ctx.boost);
boost_ *= ctx.boost;
// nothrow block
queries_ = std::move(queries);
excl_ = exclude_start;
Expand Down Expand Up @@ -214,7 +215,7 @@ doc_iterator::ptr MinMatchQuery::execute(const ExecutionContext& ctx,
const auto size = size_t(std::distance(begin, end));

// 1 <= min_match_count
size_t min_match_count = std::max(size_t(1), min_match_count_);
size_t min_match_count = std::max(size_t{1}, min_match_count_);

// check the size before the execution
if (0 == size || min_match_count > size) {
Expand Down
7 changes: 5 additions & 2 deletions core/search/boolean_query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class BooleanQuery : public filter::prepared {
void visit(const irs::SubReader& segment, irs::PreparedStateVisitor& visitor,
score_t boost) const final;

score_t boost() const noexcept final { return boost_; }

void prepare(const PrepareContext& ctx, ScoreMergeType merge_type,
queries_t queries, size_t exclude_start);

Expand All @@ -65,8 +67,9 @@ class BooleanQuery : public filter::prepared {
// excl_..queries.end() - excluded queries
queries_t queries_;
// index of the first excluded query
size_t excl_{0};
ScoreMergeType merge_type_{ScoreMergeType::kSum};
size_t excl_ = 0;
ScoreMergeType merge_type_ = ScoreMergeType::kSum;
score_t boost_ = kNoBoost;
};

// Represent a set of queries joint by "And"
Expand Down
Loading

0 comments on commit 1c70c6b

Please sign in to comment.