Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
MBkkt committed Dec 8, 2023
1 parent c982588 commit 187e73e
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 40 deletions.
3 changes: 0 additions & 3 deletions core/search/all_docs_provider.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ class AllDocsProvider {
all_docs_ = provider ? std::move(provider) : ProviderFunc{&Default};
}

protected:
~AllDocsProvider() = default;

private:
ProviderFunc all_docs_{&Default};
};
Expand Down
2 changes: 0 additions & 2 deletions core/search/levenshtein_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,6 @@ struct by_edit_distance_options : by_edit_distance_filter_options {
////////////////////////////////////////////////////////////////////////////////
class by_edit_distance final : public filter_base<by_edit_distance_options> {
public:
static ptr make();

static prepared::ptr prepare(const PrepareContext& ctx,
std::string_view field, bytes_view term,
size_t terms_limit, uint8_t max_distance,
Expand Down
8 changes: 3 additions & 5 deletions core/search/ngram_similarity_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,11 @@ filter::prepared::ptr by_ngram_similarity::Prepare(
std::clamp(static_cast<size_t>(std::ceil(terms_count * threshold)),
size_t{1}, terms_count);
if (ctx.scorers.empty() && 1 == min_match_count) {
irs::by_terms disj;
auto& terms = disj.mutable_options()->terms;
irs::by_terms_options options;
for (const auto& term : ngrams) {
terms.emplace(term, irs::kNoBoost);
options.terms.emplace(term, irs::kNoBoost);
}
*disj.mutable_field() = field_name;
return disj.prepare(ctx);
return by_terms::Prepare(ctx, field_name, options);
}

if (allow_phrase && min_match_count == terms_count) {
Expand Down
4 changes: 2 additions & 2 deletions core/search/phrase_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ struct PrepareVisitor : util::noncopyable {
part.with_transpositions, part.prefix);
}

result_type operator()(const by_terms_options& /*part*/) const {
return nullptr; // FIXME
result_type operator()(const by_terms_options& part) const {
return by_terms::Prepare(ctx, field, part);
}

result_type operator()(const by_range_options& part) const {
Expand Down
8 changes: 6 additions & 2 deletions core/search/term_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,16 @@ filter::prepared::ptr by_term::prepare(const PrepareContext& ctx,
continue;
}

field_stats.collect(segment,
*reader); // collect field statistics once per segment
field_stats.collect(segment, *reader);
// collect field statistics once per segment

VisitImpl(segment, *reader, term, visitor);
}

if (states.empty()) {
return prepared::empty();
}

bstring stats(ctx.scorers.stats_size(), 0);
auto* stats_buf = stats.data();

Expand Down
44 changes: 23 additions & 21 deletions core/search/terms_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,11 @@ void by_terms::visit(const SubReader& segment, const term_reader& field,
VisitImpl(segment, field, terms, visitor);
}

filter::prepared::ptr by_terms::prepare(const PrepareContext& ctx) const {
const auto& [terms, min_match, merge_type] = options();
filter::prepared::ptr by_terms::Prepare(const PrepareContext& ctx,
std::string_view field,
const by_terms_options& options,
const AllDocsProvider& provider) {
const auto& [terms, min_match, merge_type] = options;
const size_t size = terms.size();

if (0 == size || min_match > size) {
Expand All @@ -115,39 +118,38 @@ filter::prepared::ptr by_terms::prepare(const PrepareContext& ctx) const {

if (0 == min_match) {
if (ctx.scorers.empty()) {
return MakeAllDocsFilter(kNoBoost)->prepare({
return provider.MakeAllDocsFilter(kNoBoost)->prepare({
.index = ctx.index,
.memory = ctx.memory,
});
} else {
Or disj;
// Don't contribute to the score
disj.add(MakeAllDocsFilter(0.));
// Reset min_match to 1
disj.add<by_terms>(*this).mutable_options()->min_match = 1;
return disj.prepare({
.index = ctx.index,
.memory = ctx.memory,
.scorers = ctx.scorers,
.ctx = ctx.ctx,
});
}
Or disj;
// Don't contribute to the score
disj.add(provider.MakeAllDocsFilter(0.));
// Reset min_match to 1
auto& new_options = *disj.add<by_terms>().mutable_options();
new_options = options;
new_options.min_match = 1;
return disj.prepare({
.index = ctx.index,
.memory = ctx.memory,
.scorers = ctx.scorers,
.ctx = ctx.ctx,
});
}

const auto sub_boost = ctx.boost * boost();

if (1 == size) {
const auto term = std::begin(terms);
auto sub_ctx = ctx;
sub_ctx.boost = sub_boost * term->boost;
return by_term::prepare(sub_ctx, field(), term->term);
sub_ctx.boost = ctx.boost * term->boost;
return by_term::prepare(sub_ctx, field, term->term);
}

field_collectors field_stats{ctx.scorers};
term_collectors term_stats{ctx.scorers, size};
MultiTermQuery::States states{ctx.memory, ctx.index.size()};
all_terms_collector collector{states, field_stats, term_stats};
collect_terms(ctx.index, field(), terms, collector);
collect_terms(ctx.index, field, terms, collector);

// FIXME(gnusi): Filter out unmatched states during collection
if (min_match > 1) {
Expand All @@ -169,7 +171,7 @@ filter::prepared::ptr by_terms::prepare(const PrepareContext& ctx) const {
}

return memory::make_tracked<MultiTermQuery>(ctx.memory, std::move(states),
std::move(stats), sub_boost,
std::move(stats), ctx.boost,
merge_type, min_match);
}

Expand Down
11 changes: 8 additions & 3 deletions core/search/terms_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,18 @@ struct by_terms_options {
class by_terms final : public filter_base<by_terms_options>,
public AllDocsProvider {
public:
static ptr make();

static void visit(const SubReader& segment, const term_reader& field,
const by_terms_options::search_terms& terms,
filter_visitor& visitor);

filter::prepared::ptr prepare(const PrepareContext& ctx) const final;
static prepared::ptr Prepare(const PrepareContext& ctx,
std::string_view field,
const by_terms_options& options,
const AllDocsProvider& provider = {});

prepared::ptr prepare(const PrepareContext& ctx) const final {
return Prepare(ctx, field(), options(), *this);
}
};

} // namespace irs
Expand Down
2 changes: 0 additions & 2 deletions core/search/wildcard_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ struct by_wildcard_options : by_wildcard_filter_options {
// User-side wildcard filter
class by_wildcard final : public filter_base<by_wildcard_options> {
public:
static ptr make();

static prepared::ptr prepare(const PrepareContext& ctx,
std::string_view field, bytes_view term,
size_t scored_terms_limit);
Expand Down

0 comments on commit 187e73e

Please sign in to comment.