Skip to content

Commit

Permalink
Merge pull request ceph#60655 from xxhdx1985126/wip-seastore-move-out…
Browse files Browse the repository at this point in the history
…-root-meta

crimson/os/seastore: move the root meta out of the root block

Reviewed-by: Yingxin Cheng <yingxin.cheng@intel.com>
  • Loading branch information
cyx1231st authored Nov 15, 2024
2 parents 5853cb7 + 1627e38 commit 3a35aeb
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 119 deletions.
10 changes: 10 additions & 0 deletions src/crimson/os/seastore/cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ void Cache::register_metrics()
{extent_types_t::LADDR_INTERNAL, sm::label_instance("ext", "LADDR_INTERNAL")},
{extent_types_t::LADDR_LEAF, sm::label_instance("ext", "LADDR_LEAF")},
{extent_types_t::DINK_LADDR_LEAF, sm::label_instance("ext", "DINK_LADDR_LEAF")},
{extent_types_t::ROOT_META, sm::label_instance("ext", "ROOT_META")},
{extent_types_t::OMAP_INNER, sm::label_instance("ext", "OMAP_INNER")},
{extent_types_t::OMAP_LEAF, sm::label_instance("ext", "OMAP_LEAF")},
{extent_types_t::ONODE_BLOCK_STAGED, sm::label_instance("ext", "ONODE_BLOCK_STAGED")},
Expand Down Expand Up @@ -1093,6 +1094,9 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
case extent_types_t::LADDR_LEAF:
return alloc_new_non_data_extent<lba_manager::btree::LBALeafNode>(
t, length, hint, gen);
case extent_types_t::ROOT_META:
return alloc_new_non_data_extent<RootMetaBlock>(
t, length, hint, gen);
case extent_types_t::ONODE_BLOCK_STAGED:
return alloc_new_non_data_extent<onode::SeastoreNodeExtent>(
t, length, hint, gen);
Expand Down Expand Up @@ -2193,6 +2197,12 @@ Cache::do_get_caching_extent_by_type(
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
case extent_types_t::ROOT_META:
return do_get_caching_extent<RootMetaBlock>(
offset, length, std::move(extent_init_func), std::move(on_cache)
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
case extent_types_t::OMAP_INNER:
return do_get_caching_extent<omap_manager::OMapInnerNode>(
offset, length, std::move(extent_init_func), std::move(on_cache)
Expand Down
7 changes: 6 additions & 1 deletion src/crimson/os/seastore/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,8 @@ class Cache {
auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen);
#endif
if (!result) {
return nullptr;
SUBERRORT(seastore_cache, "insufficient space", t);
std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
}
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp));
ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
Expand Down Expand Up @@ -1019,6 +1020,10 @@ class Cache {
#else
auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen);
#endif
if (results.empty()) {
SUBERRORT(seastore_cache, "insufficient space", t);
std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
}
std::vector<TCachedExtentRef<T>> extents;
for (auto &result : results) {
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,10 @@ BlockRBManager::write_ertr::future<> BlockRBManager::write(
void BlockRBManager::prefill_fragmented_device()
{
LOG_PREFIX(BlockRBManager::prefill_fragmented_device);
// the first 2 blocks must be allocated to lba root
// the first 3 blocks must be allocated to lba root
// and backref root during mkfs
for (size_t block = get_block_size() * 2;
block <= get_size() - get_block_size() * 2;
for (size_t block = get_block_size() * 3;
block <= get_size() - get_block_size() * 3;
block += get_block_size() * 2) {
DEBUG("marking {}~{} used",
get_start_rbm_addr() + block,
Expand Down
76 changes: 76 additions & 0 deletions src/crimson/os/seastore/root_meta.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

#pragma once

#include "crimson/os/seastore/cached_extent.h"

namespace crimson::os::seastore {

struct RootMetaBlock : LogicalCachedExtent {
using meta_t = std::map<std::string, std::string>;
using Ref = TCachedExtentRef<RootMetaBlock>;
static constexpr size_t SIZE = 4096;
static constexpr int MAX_META_LENGTH = 1024;

explicit RootMetaBlock(ceph::bufferptr &&ptr)
: LogicalCachedExtent(std::move(ptr)) {}
explicit RootMetaBlock(extent_len_t length)
: LogicalCachedExtent(length) {}
RootMetaBlock(const RootMetaBlock &rhs)
: LogicalCachedExtent(rhs) {}

CachedExtentRef duplicate_for_write(Transaction&) final {
return CachedExtentRef(new RootMetaBlock(*this));
}

static constexpr extent_types_t TYPE = extent_types_t::ROOT_META;
extent_types_t get_type() const final {
return extent_types_t::ROOT_META;
}

/// dumps root meta as delta
ceph::bufferlist get_delta() final {
ceph::bufferlist bl;
ceph::buffer::ptr bptr(get_bptr(), 0, MAX_META_LENGTH);
bl.append(bptr);
return bl;
}

/// overwrites root
void apply_delta(const ceph::bufferlist &_bl) final
{
assert(_bl.length() == MAX_META_LENGTH);
ceph::bufferlist bl = _bl;
bl.rebuild();
get_bptr().copy_in(0, MAX_META_LENGTH, bl.front().c_str());
}

meta_t get_meta() const {
bufferlist bl;
bl.append(get_bptr());
meta_t ret;
auto iter = bl.cbegin();
decode(ret, iter);
return ret;
}

void set_meta(const meta_t &m) {
ceph::bufferlist bl;
encode(m, bl);
ceph_assert(bl.length() <= MAX_META_LENGTH);
bl.rebuild();
get_bptr().zero(0, MAX_META_LENGTH);
get_bptr().copy_in(0, bl.length(), bl.front().c_str());
}

};
using RootMetaBlockRef = RootMetaBlock::Ref;

} // crimson::os::seastore


#if FMT_VERSION >= 90000
template <> struct fmt::formatter<crimson::os::seastore::RootMetaBlock>
: fmt::ostream_formatter {};
#endif
2 changes: 2 additions & 0 deletions src/crimson/os/seastore/seastore_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
return out << "LADDR_LEAF";
case extent_types_t::ONODE_BLOCK_STAGED:
return out << "ONODE_BLOCK_STAGED";
case extent_types_t::ROOT_META:
return out << "ROOT_META";
case extent_types_t::OMAP_INNER:
return out << "OMAP_INNER";
case extent_types_t::OMAP_LEAF:
Expand Down
61 changes: 18 additions & 43 deletions src/crimson/os/seastore/seastore_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1378,23 +1378,24 @@ enum class extent_types_t : uint8_t {
LADDR_INTERNAL = 1,
LADDR_LEAF = 2,
DINK_LADDR_LEAF = 3, // should only be used for unitttests
OMAP_INNER = 4,
OMAP_LEAF = 5,
ONODE_BLOCK_STAGED = 6,
COLL_BLOCK = 7,
OBJECT_DATA_BLOCK = 8,
RETIRED_PLACEHOLDER = 9,
ROOT_META = 4,
OMAP_INNER = 5,
OMAP_LEAF = 6,
ONODE_BLOCK_STAGED = 7,
COLL_BLOCK = 8,
OBJECT_DATA_BLOCK = 9,
RETIRED_PLACEHOLDER = 10,
// the following two types are not extent types,
// they are just used to indicates paddr allocation deltas
ALLOC_INFO = 10,
JOURNAL_TAIL = 11,
ALLOC_INFO = 11,
JOURNAL_TAIL = 12,
// Test Block Types
TEST_BLOCK = 12,
TEST_BLOCK_PHYSICAL = 13,
BACKREF_INTERNAL = 14,
BACKREF_LEAF = 15,
TEST_BLOCK = 13,
TEST_BLOCK_PHYSICAL = 14,
BACKREF_INTERNAL = 15,
BACKREF_LEAF = 16,
// None and the number of valid extent_types_t
NONE = 16,
NONE = 17,
};
using extent_types_le_t = uint8_t;
constexpr auto EXTENT_TYPES_MAX = static_cast<uint8_t>(extent_types_t::NONE);
Expand All @@ -1409,12 +1410,12 @@ constexpr bool is_data_type(extent_types_t type) {
}

constexpr bool is_logical_metadata_type(extent_types_t type) {
return type >= extent_types_t::OMAP_INNER &&
return type >= extent_types_t::ROOT_META &&
type <= extent_types_t::COLL_BLOCK;
}

constexpr bool is_logical_type(extent_types_t type) {
if ((type >= extent_types_t::OMAP_INNER &&
if ((type >= extent_types_t::ROOT_META &&
type <= extent_types_t::OBJECT_DATA_BLOCK) ||
type == extent_types_t::TEST_BLOCK) {
assert(is_logical_metadata_type(type) ||
Expand Down Expand Up @@ -1926,44 +1927,18 @@ using backref_root_t = phy_tree_root_t;
* TODO: generalize this to permit more than one lba_manager implementation
*/
struct __attribute__((packed)) root_t {
using meta_t = std::map<std::string, std::string>;

static constexpr int MAX_META_LENGTH = 1024;

backref_root_t backref_root;
lba_root_t lba_root;
laddr_le_t onode_root;
coll_root_le_t collection_root;
laddr_le_t meta;

char meta[MAX_META_LENGTH];

root_t() {
set_meta(meta_t{});
}
root_t() = default;

void adjust_addrs_from_base(paddr_t base) {
lba_root.adjust_addrs_from_base(base);
backref_root.adjust_addrs_from_base(base);
}

meta_t get_meta() {
bufferlist bl;
bl.append(ceph::buffer::create_static(MAX_META_LENGTH, meta));
meta_t ret;
auto iter = bl.cbegin();
decode(ret, iter);
return ret;
}

void set_meta(const meta_t &m) {
ceph::bufferlist bl;
encode(m, bl);
ceph_assert(bl.length() < MAX_META_LENGTH);
bl.rebuild();
auto &bptr = bl.front();
::memset(meta, 0, MAX_META_LENGTH);
::memcpy(meta, bptr.c_str(), bl.length());
}
};

struct alloc_blk_t {
Expand Down
2 changes: 2 additions & 0 deletions src/crimson/os/seastore/transaction_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
return lba_manager->mkfs(t);
}).si_then([this, &t] {
return backref_manager->mkfs(t);
}).si_then([this, &t] {
return init_root_meta(t);
}).si_then([this, FNAME, &t] {
INFOT("submitting mkfs transaction", t);
return submit_transaction_direct(t);
Expand Down
61 changes: 45 additions & 16 deletions src/crimson/os/seastore/transaction_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "crimson/os/seastore/logging.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/root_meta.h"
#include "crimson/os/seastore/lba_manager.h"
#include "crimson/os/seastore/backref_manager.h"
#include "crimson/os/seastore/journal.h"
Expand Down Expand Up @@ -303,10 +304,6 @@ class TransactionManager : public ExtentCallbackInterface {
len,
placement_hint,
INIT_GENERATION);
if (!ext) {
SUBERRORT(seastore_tm, "insufficient space!", t);
return crimson::ct_error::enospc::make();
}
return lba_manager->alloc_extent(
t,
laddr_hint,
Expand Down Expand Up @@ -342,10 +339,6 @@ class TransactionManager : public ExtentCallbackInterface {
len,
placement_hint,
INIT_GENERATION);
if (exts.empty()) {
SUBERRORT(seastore_tm, "insufficient space!", t);
return crimson::ct_error::enospc::make();
}
return lba_manager->alloc_extents(
t,
laddr_hint,
Expand Down Expand Up @@ -690,9 +683,11 @@ class TransactionManager : public ExtentCallbackInterface {
const std::string &key) {
return cache->get_root(
t
).si_then([&key, &t](auto root) {
).si_then([&t, this](auto root) {
return read_extent<RootMetaBlock>(t, root->root.meta);
}).si_then([key, &t](auto mblock) {
LOG_PREFIX(TransactionManager::read_root_meta);
auto meta = root->root.get_meta();
auto meta = mblock->get_meta();
auto iter = meta.find(key);
if (iter == meta.end()) {
SUBDEBUGT(seastore_tm, "{} -> nullopt", t, key);
Expand All @@ -701,7 +696,35 @@ class TransactionManager : public ExtentCallbackInterface {
SUBDEBUGT(seastore_tm, "{} -> {}", t, key, iter->second);
return seastar::make_ready_future<read_root_meta_bare>(iter->second);
}
});
}).handle_error_interruptible(
crimson::ct_error::input_output_error::pass_further{},
crimson::ct_error::assert_all{"unexpected error!"}
);
}

/**
* init_root_meta
*
* create the root meta block
*/
using init_root_meta_iertr = base_iertr;
using init_root_meta_ret = init_root_meta_iertr::future<>;
init_root_meta_ret init_root_meta(Transaction &t) {
return alloc_non_data_extent<RootMetaBlock>(
t, L_ADDR_MIN, RootMetaBlock::SIZE
).si_then([this, &t](auto meta) {
meta->set_meta(RootMetaBlock::meta_t{});
return cache->get_root(t
).si_then([this, &t, meta](auto root) {
auto mroot = cache->duplicate_for_write(
t, root)->template cast<RootBlock>();
mroot->root.meta = meta->get_laddr();
return seastar::now();
});
}).handle_error_interruptible(
crimson::ct_error::input_output_error::pass_further{},
crimson::ct_error::assert_all{"unexpected error!"}
);
}

/**
Expand All @@ -719,15 +742,21 @@ class TransactionManager : public ExtentCallbackInterface {
SUBDEBUGT(seastore_tm, "seastore_tm, {} -> {} ...", t, key, value);
return cache->get_root(
t
).si_then([this, &t, &key, &value](RootBlockRef root) {
root = cache->duplicate_for_write(t, root)->cast<RootBlock>();
).si_then([this, &t](RootBlockRef root) {
return read_extent<RootMetaBlock>(t, root->root.meta);
}).si_then([this, key, value, &t](auto mblock) {
mblock = get_mutable_extent(t, mblock
)->template cast<RootMetaBlock>();

auto meta = root->root.get_meta();
auto meta = mblock->get_meta();
meta[key] = value;

root->root.set_meta(meta);
mblock->set_meta(meta);
return seastar::now();
});
}).handle_error_interruptible(
crimson::ct_error::input_output_error::pass_further{},
crimson::ct_error::assert_all{"unexpected error!"}
);
}

/**
Expand Down
Loading

0 comments on commit 3a35aeb

Please sign in to comment.