Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Some improvements (#559)
Browse files Browse the repository at this point in the history
  • Loading branch information
MBkkt authored Sep 14, 2023
1 parent 64e5b52 commit add15fb
Show file tree
Hide file tree
Showing 45 changed files with 284 additions and 300 deletions.
18 changes: 6 additions & 12 deletions core/analysis/minhash_token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ namespace {
// That is important because produced tokens are stored in the index as terms.

// Some primes between 2^63 and 2^64 for various uses.
static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
static const uint64_t k1 = 0xb492b66fbe98f273ULL;
static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
static constexpr uint64_t k0 = 0xc3a5c85c97cb3127ULL;
static constexpr uint64_t k1 = 0xb492b66fbe98f273ULL;
static constexpr uint64_t k2 = 0x9ae16a3b2f90404fULL;

#ifdef ABSL_IS_BIG_ENDIAN
#define uint32_in_expected_order(x) (absl::gbswap_32(x))
Expand Down Expand Up @@ -467,17 +467,11 @@ bool MinHashTokenStream::next() {
return false;
}

const size_t value = [value = *begin_]() noexcept -> size_t {
if constexpr (is_big_endian()) {
return absl::gbswap_64(value);
} else {
return value;
}
}();
const auto value = absl::little_endian::FromHost(*begin_);

[[maybe_unused]] const size_t length =
absl::strings_internal::Base64EscapeInternal(
reinterpret_cast<const byte_type*>(&value), sizeof value, buf_.data(),
reinterpret_cast<const uint8_t*>(&value), sizeof value, buf_.data(),
buf_.size(), absl::strings_internal::kBase64Chars, false);
IRS_ASSERT(length == buf_.size());

Expand Down Expand Up @@ -510,7 +504,7 @@ void MinHashTokenStream::ComputeSignature() {

do {
const std::string_view value = ViewCast<char>(term_->value);
const size_t hash_value = ::CityHash64(value.data(), value.size());
const auto hash_value = ::CityHash64(value.data(), value.size());

minhash_.Insert(hash_value);
end = offs->end;
Expand Down
2 changes: 1 addition & 1 deletion core/analysis/minhash_token_stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class MinHashTokenStream final : public TypedAnalyzer<MinHashTokenStream>,

private:
using attributes = std::tuple<term_attribute, increment, offset>;
using iterator = std::vector<size_t>::const_iterator;
using iterator = std::vector<uint64_t>::const_iterator;

void ComputeSignature();

Expand Down
4 changes: 2 additions & 2 deletions core/error/error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ enum class ErrorCode : uint32_t {
undefined_error
};

#define DECLARE_ERROR_CODE(class_name) \
static const ErrorCode CODE = ErrorCode::class_name; \
#define DECLARE_ERROR_CODE(class_name) \
static constexpr ErrorCode CODE = ErrorCode::class_name; \
::irs::ErrorCode code() const noexcept final { return CODE; }

//////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion core/formats/columnstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ namespace columnstore {
template<size_t Size>
class index_block {
public:
static const size_t SIZE = Size;
static constexpr size_t SIZE = Size;

void push_back(doc_id_t key, uint64_t offset) noexcept {
IRS_ASSERT(key_ >= keys_);
Expand Down
2 changes: 1 addition & 1 deletion core/formats/columnstore2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1705,7 +1705,7 @@ const column_header* reader::header(field_id field) const {
: columns_[field];

if (column) {
return &down_cast<column_base>(*column).header();
return &DownCast<column_base>(*column).header();
}

return nullptr;
Expand Down
10 changes: 5 additions & 5 deletions core/formats/formats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ struct field_writer {
};

struct WandInfo {
byte_type mapped_index{WandContext::kDisable};
byte_type count{0};
uint8_t mapped_index{WandContext::kDisable};
uint8_t count{0};
};

struct postings_reader {
Expand All @@ -171,7 +171,7 @@ struct postings_reader {
virtual doc_iterator::ptr iterator(IndexFeatures field_features,
IndexFeatures required_features,
const term_meta& meta,
byte_type wand_count) = 0;
uint8_t wand_count) = 0;

virtual doc_iterator::ptr wanderator(IndexFeatures field_features,
IndexFeatures required_features,
Expand All @@ -187,7 +187,7 @@ struct postings_reader {
// This API is experimental.
virtual size_t bit_union(IndexFeatures field_features,
const term_provider_f& provider, size_t* set,
byte_type wand_count) = 0;
uint8_t wand_count) = 0;
};

// Expected usage pattern of seek_term_iterator
Expand Down Expand Up @@ -252,7 +252,7 @@ struct term_reader : public attribute_provider {
virtual bytes_view(max)() const = 0;

// Returns true if scorer denoted by the is supported by the field.
virtual bool has_scorer(byte_type index) const = 0;
virtual bool has_scorer(uint8_t index) const = 0;
};

struct field_reader {
Expand Down
48 changes: 24 additions & 24 deletions core/formats/formats_10.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ void postings_writer_base::EndTerm(version10::term_meta& meta) {
const bool has_skip_list = skip_.Skip0() < meta.docs_count;
auto write_max_score = [&](size_t level) {
ApplyWriters([&](auto& writer) {
const byte_type size = writer.SizeRoot(level);
const uint8_t size = writer.SizeRoot(level);
doc_out_->write_byte(size);
});
ApplyWriters([&](auto& writer) { writer.WriteRoot(level, *doc_out_); });
Expand Down Expand Up @@ -1000,7 +1000,7 @@ void postings_writer<FormatTraits>::write(irs::doc_iterator& docs,
// FIXME(gnusi): optimize for 1 writer case? compile? maybe just 1
// composite wand writer?
ApplyWriters([&](auto& writer) {
const byte_type size = writer.Size(level);
const uint8_t size = writer.Size(level);
IRS_ASSERT(size <= irs::WandWriter::kMaxSize);
out.write_byte(size);
});
Expand Down Expand Up @@ -1928,24 +1928,24 @@ void single_doc_iterator<IteratorTraits, FieldTraits>::prepare(

static_assert(kMaxScorers < WandContext::kDisable);

template<byte_type Value>
template<uint8_t Value>
struct Extent {
static constexpr byte_type GetExtent() noexcept { return Value; }
static constexpr uint8_t GetExtent() noexcept { return Value; }
};

template<>
struct Extent<WandContext::kDisable> {
Extent(byte_type value) noexcept : value{value} {}
Extent(uint8_t value) noexcept : value{value} {}

constexpr byte_type GetExtent() const noexcept { return value; }
constexpr uint8_t GetExtent() const noexcept { return value; }

byte_type value;
uint8_t value;
};

using DynamicExtent = Extent<WandContext::kDisable>;

template<byte_type PossibleMin, typename Func>
auto ResolveExtent(byte_type extent, Func&& func) {
template<uint8_t PossibleMin, typename Func>
auto ResolveExtent(uint8_t extent, Func&& func) {
if constexpr (PossibleMin == WandContext::kDisable) {
return std::forward<Func>(func)(Extent<0>{});
} else {
Expand Down Expand Up @@ -1985,7 +1985,7 @@ void CommonSkipWandData(WandExtent extent, index_input& in) {
}

template<typename WandExtent>
void CommonReadWandData(WandExtent wextent, byte_type index,
void CommonReadWandData(WandExtent wextent, uint8_t index,
const ScoreFunction& func, WandSource& ctx,
index_input& in, score_t& score) {
const auto extent = wextent.GetExtent();
Expand All @@ -1998,7 +1998,7 @@ void CommonReadWandData(WandExtent wextent, byte_type index,
return;
}

byte_type i = 0;
uint8_t i = 0;
uint64_t scorer_offset = 0;
for (; i < index; ++i) {
scorer_offset += in.read_byte();
Expand Down Expand Up @@ -2043,7 +2043,7 @@ class doc_iterator : public doc_iterator_base<IteratorTraits, FieldTraits> {
void WandPrepare(const term_meta& meta, const index_input* doc_in,
const index_input* pos_in, const index_input* pay_in,
const ScoreFunctionFactory& factory, const Scorer& scorer,
byte_type wand_index) {
uint8_t wand_index) {
prepare(meta, doc_in, pos_in, pay_in, wand_index);
if (meta.docs_count > FieldTraits::block_size()) {
return;
Expand Down Expand Up @@ -2073,7 +2073,7 @@ class doc_iterator : public doc_iterator_base<IteratorTraits, FieldTraits> {
void prepare(const term_meta& meta, const index_input* doc_in,
[[maybe_unused]] const index_input* pos_in,
[[maybe_unused]] const index_input* pay_in,
byte_type wand_index = WandContext::kDisable);
uint8_t wand_index = WandContext::kDisable);

private:
attribute* get_mutable(irs::type_info::type_id type) noexcept final {
Expand Down Expand Up @@ -2138,8 +2138,8 @@ class doc_iterator : public doc_iterator_base<IteratorTraits, FieldTraits> {
Disable(); // Prevent using skip-list by default
}

void ReadMaxScore(byte_type index, const ScoreFunction& func,
WandSource& ctx, index_input& in, score_t& score) {
void ReadMaxScore(uint8_t index, const ScoreFunction& func, WandSource& ctx,
index_input& in, score_t& score) {
CommonReadWandData(static_cast<WandExtent>(*this), index, func, ctx, in,
score);
}
Expand Down Expand Up @@ -2246,7 +2246,7 @@ template<typename IteratorTraits, typename FieldTraits, typename WandExtent>
void doc_iterator<IteratorTraits, FieldTraits, WandExtent>::prepare(
const term_meta& meta, const index_input* doc_in,
[[maybe_unused]] const index_input* pos_in,
[[maybe_unused]] const index_input* pay_in, byte_type wand_index) {
[[maybe_unused]] const index_input* pay_in, uint8_t wand_index) {
// Don't use doc_iterator for singleton docs, must be ensured by the caller
IRS_ASSERT(meta.docs_count > 1);
IRS_ASSERT(this->begin_ == std::end(this->buf_.docs));
Expand Down Expand Up @@ -2452,7 +2452,7 @@ class wanderator : public doc_iterator_base<IteratorTraits, FieldTraits>,
using ptr = memory::managed_ptr<wanderator>;

wanderator(const ScoreFunctionFactory& factory, const Scorer& scorer,
WandExtent extent, byte_type index, bool strict)
WandExtent extent, uint8_t index, bool strict)
: skip_{IteratorTraits::block_size(), postings_writer_base::kSkipN,
ReadSkip{factory, scorer, index, extent}},
scorer_{factory(*this)} {
Expand Down Expand Up @@ -2503,7 +2503,7 @@ class wanderator : public doc_iterator_base<IteratorTraits, FieldTraits>,
class ReadSkip {
public:
ReadSkip(const ScoreFunctionFactory& factory, const Scorer& scorer,
byte_type index, WandExtent extent)
uint8_t index, WandExtent extent)
: ctx_{scorer.prepare_wand_source()},
func_{factory(*ctx_)},
index_{index},
Expand Down Expand Up @@ -2551,7 +2551,7 @@ class wanderator : public doc_iterator_base<IteratorTraits, FieldTraits>,
std::vector<score_t> skip_scores_;
SkipState prev_skip_; // skip context used by skip reader
score_t threshold_{};
byte_type index_;
uint8_t index_;
IRS_NO_UNIQUE_ADDRESS WandExtent extent_;
};

Expand Down Expand Up @@ -2903,7 +2903,7 @@ bool IndexMetaWriter::prepare(directory& dir, IndexMeta& meta,

if (version_ > kFormatMin) {
const auto payload = GetPayload(meta);
const byte_type flags = IsNull(payload) ? 0 : kHasPayload;
const uint8_t flags = IsNull(payload) ? 0 : kHasPayload;
out->write_byte(flags);

if (flags == kHasPayload) {
Expand Down Expand Up @@ -3121,7 +3121,7 @@ void SegmentMetaWriter::write(directory& dir, std::string& meta_file,
throw io_error{absl::StrCat("failed to create file, path: ", meta_file)};
}

byte_type flags = meta.column_store ? HAS_COLUMN_STORE : 0;
uint8_t flags = meta.column_store ? HAS_COLUMN_STORE : 0;

format_utils::write_header(*out, FORMAT_NAME, version_);
write_string(*out, meta.name);
Expand Down Expand Up @@ -3498,7 +3498,7 @@ class postings_reader final : public postings_reader_base {
irs::doc_iterator::ptr iterator(IndexFeatures field_features,
IndexFeatures required_features,
const term_meta& meta,
byte_type wand_count) final {
uint8_t wand_count) final {
if (meta.docs_count == 0) {
IRS_ASSERT(false);
return irs::doc_iterator::empty();
Expand Down Expand Up @@ -3540,7 +3540,7 @@ class postings_reader final : public postings_reader_base {
}

size_t bit_union(IndexFeatures field, const term_provider_f& provider,
size_t* set, byte_type wand_count) final;
size_t* set, uint8_t wand_count) final;

private:
irs::doc_iterator::ptr MakeWanderator(IndexFeatures field_features,
Expand Down Expand Up @@ -3760,7 +3760,7 @@ void bit_union(index_input& doc_in, doc_id_t docs_count, uint32_t (&docs)[N],
template<typename FormatTraits>
size_t postings_reader<FormatTraits>::bit_union(
const IndexFeatures field_features, const term_provider_f& provider,
size_t* set, byte_type wand_count) {
size_t* set, uint8_t wand_count) {
constexpr auto BITS{bits_required<std::remove_pointer_t<decltype(set)>>()};
uint32_t enc_buf[FormatTraits::block_size()];
uint32_t docs[FormatTraits::block_size()];
Expand Down
Loading

0 comments on commit add15fb

Please sign in to comment.