Skip to content

Commit

Permalink
Make BatchIt optional
Browse files Browse the repository at this point in the history
Move ring set bits and associativity knobs to allocconfig and expose them via
CMake.  If associtivity is zero, use non-batched implementations of the
RemoteMessage and RemoteDeallocCacheBatching classes.

By default, kick BatchIt on when we have enough room in the minimum allocation
size to do it.  Exactly how much space is enough is a function of which
mitigations we have enabled and whether or not we are compiling with C++20.
  • Loading branch information
nwf-msr committed Jun 19, 2024
1 parent eb92e95 commit c2da3a6
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 31 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ endif()
set(SNMALLOC_MIN_ALLOC_SIZE "" CACHE STRING "Minimum allocation bytes (power of 2)")
set(SNMALLOC_MIN_ALLOC_STEP_SIZE "" CACHE STRING "Minimum allocation step (power of 2)")

set(SNMALLOC_DEALLOC_BATCH_RING_ASSOC "" CACHE STRING "Associativity of deallocation batch cache; 0 to disable")
set(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS "" CACHE STRING "Logarithm of number of deallocation batch cache associativity sets")

if(MSVC AND SNMALLOC_STATIC_LIBRARY AND (SNMALLOC_STATIC_LIBRARY_PREFIX STREQUAL ""))
message(FATAL_ERROR "Empty static library prefix not supported on MSVC")
endif()
Expand Down Expand Up @@ -248,6 +251,8 @@ if (SNMALLOC_NO_REALLOCARR)
endif()
add_as_define_value(SNMALLOC_MIN_ALLOC_SIZE)
add_as_define_value(SNMALLOC_MIN_ALLOC_STEP_SIZE)
add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_ASSOC)
add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS)

target_compile_definitions(snmalloc INTERFACE $<$<BOOL:CONST_QUALIFIED_MALLOC_USABLE_SIZE>:MALLOC_USABLE_SIZE_QUALIFIER=const>)

Expand Down
31 changes: 31 additions & 0 deletions src/snmalloc/ds/allocconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,37 @@ namespace snmalloc
static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS;
static constexpr size_t REMOTE_MASK = REMOTE_SLOTS - 1;

static constexpr size_t DEALLOC_BATCH_RING_ASSOC =
#if defined(SNMALLOC_DEALLOC_BATCH_RING_ASSOC)
SNMALLOC_DEALLOC_BATCH_RING_ASSOC;
#else
# ifdef SNMALLOC_USE_CXX17
// For C++17, we don't have [[no_unique_address]] and so we always end up
// needing all four pointers' worth of space (because BatchedRemoteMessage
// has two freelist::Object::T<> links within, each of which will have two
// fields and will be padded to two pointers).
(MIN_ALLOC_SIZE >= 4 * sizeof(void*))
# else
// For C++20 or later, we do have [[no_unique_address]] and so can also do
// batching if we aren't turning on the backward-pointer mitigations
(MIN_ALLOC_SIZE >= 4 * sizeof(void*) ||
!mitigations(freelist_backward_edge))
# endif
?
2 :
0;
#endif
static constexpr size_t DEALLOC_BATCH_RING_SET_BITS =
#if defined(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS)
SNMALLOC_DEALLOC_BATCH_RING_SET_BITS
#else
3
#endif
;

static constexpr size_t DEALLOC_BATCH_RINGS =
DEALLOC_BATCH_RING_ASSOC * bits::one_at_bit(DEALLOC_BATCH_RING_SET_BITS);

static_assert(
INTERMEDIATE_BITS < MIN_ALLOC_STEP_BITS,
"INTERMEDIATE_BITS must be less than MIN_ALLOC_BITS");
Expand Down
4 changes: 2 additions & 2 deletions src/snmalloc/mem/freelist.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

namespace snmalloc
{
class RemoteMessage;
class BatchedRemoteMessage;

static constexpr address_t NO_KEY_TWEAK = 0;

Expand Down Expand Up @@ -134,7 +134,7 @@ namespace snmalloc

friend class Object;

friend class ::snmalloc::RemoteMessage;
friend class ::snmalloc::BatchedRemoteMessage;

class Empty
{
Expand Down
108 changes: 90 additions & 18 deletions src/snmalloc/mem/remoteallocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,22 @@ namespace snmalloc
* address space). This gives us enough room to pack in the length of the
* ring, without needing to grow the structure.
*/
class RemoteMessage
class BatchedRemoteMessage
{
friend class RemoteMessageAssertions;
friend class BatchedRemoteMessageAssertions;

freelist::Object::T<> free_ring;
freelist::Object::T<> message_link;

static_assert(
sizeof(free_ring.next_object) >= sizeof(void*),
"RemoteMessage bitpacking needs sizeof(void*) in next_object");
"BatchedRemoteMessage bitpacking needs sizeof(void*) in next_object");

public:
static auto emplace_in_alloc(capptr::Alloc<void> alloc)
{
return CapPtr<RemoteMessage, capptr::bounds::Alloc>::unsafe_from(
new (alloc.unsafe_ptr()) RemoteMessage());
return CapPtr<BatchedRemoteMessage, capptr::bounds::Alloc>::unsafe_from(
new (alloc.unsafe_ptr()) BatchedRemoteMessage());
}

static auto mk_from_freelist_builder(
Expand All @@ -57,8 +57,9 @@ namespace snmalloc

// Preserve the last node's backpointer and change its type.
auto last_prev = last->prev;
auto self = CapPtr<RemoteMessage, capptr::bounds::Alloc>::unsafe_from(
new (last.unsafe_ptr()) RemoteMessage());
auto self =
CapPtr<BatchedRemoteMessage, capptr::bounds::Alloc>::unsafe_from(
new (last.unsafe_ptr()) BatchedRemoteMessage());
self->free_ring.prev = last_prev;

// XXX On CHERI, we could do a fair bit better if we had a primitive for
Expand All @@ -78,25 +79,27 @@ namespace snmalloc
return self;
}

static freelist::HeadPtr to_message_link(capptr::Alloc<RemoteMessage> m)
static freelist::HeadPtr
to_message_link(capptr::Alloc<BatchedRemoteMessage> m)
{
return pointer_offset(m, offsetof(RemoteMessage, message_link))
return pointer_offset(m, offsetof(BatchedRemoteMessage, message_link))
.as_reinterpret<freelist::Object::T<>>();
}

static capptr::Alloc<RemoteMessage>
static capptr::Alloc<BatchedRemoteMessage>
from_message_link(freelist::HeadPtr chainPtr)
{
return pointer_offset_signed(
chainPtr,
-static_cast<ptrdiff_t>(offsetof(RemoteMessage, message_link)))
.as_reinterpret<RemoteMessage>();
-static_cast<ptrdiff_t>(
offsetof(BatchedRemoteMessage, message_link)))
.as_reinterpret<BatchedRemoteMessage>();
}

template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
SNMALLOC_FAST_PATH static std::pair<freelist::HeadPtr, uint16_t>
open_free_ring(
capptr::Alloc<RemoteMessage> m,
capptr::Alloc<BatchedRemoteMessage> m,
size_t objsize,
const FreeListKey& key,
address_t key_tweak,
Expand Down Expand Up @@ -142,7 +145,7 @@ namespace snmalloc

template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
static uint16_t ring_size(
capptr::Alloc<RemoteMessage> m,
capptr::Alloc<BatchedRemoteMessage> m,
const FreeListKey& key,
address_t key_tweak,
Domesticator_queue domesticate)
Expand Down Expand Up @@ -183,16 +186,85 @@ namespace snmalloc
}
};

class RemoteMessageAssertions
class BatchedRemoteMessageAssertions
{
static_assert(sizeof(RemoteMessage) <= MIN_ALLOC_SIZE);
static_assert(offsetof(RemoteMessage, free_ring) == 0);
static_assert(
(DEALLOC_BATCH_RINGS == 0) ||
(sizeof(SingletonRemoteMessage) <= MIN_ALLOC_SIZE));
static_assert(offsetof(BatchedRemoteMessage, free_ring) == 0);

static_assert(
MAX_SLAB_SPAN_BITS + MAX_CAPACITY_BITS < 8 * sizeof(void*),
(DEALLOC_BATCH_RINGS == 0) ||
(MAX_SLAB_SPAN_BITS + MAX_CAPACITY_BITS < 8 * sizeof(void*)),
"Ring bit-stuffing trick can't reach far enough to enclose a slab");
};

class SingletonRemoteMessage
{
friend class SingletonRemoteMessageAssertions;

freelist::Object::T<> message_link;

public:
static auto emplace_in_alloc(capptr::Alloc<void> alloc)
{
return CapPtr<SingletonRemoteMessage, capptr::bounds::Alloc>::unsafe_from(
new (alloc.unsafe_ptr()) SingletonRemoteMessage());
}

static freelist::HeadPtr
to_message_link(capptr::Alloc<SingletonRemoteMessage> m)
{
return pointer_offset(m, offsetof(SingletonRemoteMessage, message_link))
.as_reinterpret<freelist::Object::T<>>();
}

static capptr::Alloc<SingletonRemoteMessage>
from_message_link(freelist::HeadPtr chainPtr)
{
return pointer_offset_signed(
chainPtr,
-static_cast<ptrdiff_t>(
offsetof(SingletonRemoteMessage, message_link)))
.as_reinterpret<SingletonRemoteMessage>();
}

template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
SNMALLOC_FAST_PATH static std::pair<freelist::HeadPtr, uint16_t>
open_free_ring(
capptr::Alloc<SingletonRemoteMessage> m,
size_t,
const FreeListKey&,
address_t,
Domesticator_queue)
{
return {m.as_reinterpret<freelist::Object::T<>>(), 1};
}

template<SNMALLOC_CONCEPT(IsConfigLazy) Config, typename Domesticator_queue>
static uint16_t ring_size(
capptr::Alloc<SingletonRemoteMessage>,
const FreeListKey&,
address_t,
Domesticator_queue)
{
return 1;
}
};

class SingletonRemoteMessageAssertions
{
static_assert(sizeof(SingletonRemoteMessage) <= MIN_ALLOC_SIZE);
static_assert(offsetof(SingletonRemoteMessage, message_link) == 0);
};

using RemoteMessage = std::conditional_t<
(DEALLOC_BATCH_RINGS > 0),
BatchedRemoteMessage,
SingletonRemoteMessage>;

static_assert(sizeof(RemoteMessage) <= MIN_ALLOC_SIZE);

/**
* A RemoteAllocator is the message queue of freed objects. It builds on the
* FreeListMPSCQ but encapsulates knowledge that the objects are actually
Expand Down
49 changes: 38 additions & 11 deletions src/snmalloc/mem/remotecache.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,13 @@

namespace snmalloc
{
template<typename Config>
/**
* Stores the remote deallocation to batch them before sending
*/
template<typename Config, size_t RINGS>
class RemoteDeallocCacheBatching
{
static constexpr size_t RING_ASSOC = 2;
static constexpr size_t RING_SET_BITS = 3;

static constexpr size_t RINGS =
RING_ASSOC * bits::one_at_bit(RING_SET_BITS);
static_assert(RINGS > 0);

std::array<freelist::Builder<false, true>, RINGS> open_builder;
std::array<typename Config::PagemapEntry::SlabMetadata*, RINGS> open_meta =
Expand All @@ -30,13 +29,13 @@ namespace snmalloc
{
// See https://github.com/skeeto/hash-prospector for choice of constant
return ((meta->as_key_tweak() * 0x7EFB352D) >> 16) &
bits::mask_bits(RING_SET_BITS);
bits::mask_bits(DEALLOC_BATCH_RING_SET_BITS);
}

template<typename Forward>
SNMALLOC_FAST_PATH void close_one_pending(Forward forward, size_t ix)
{
auto rmsg = RemoteMessage::mk_from_freelist_builder(
auto rmsg = BatchedRemoteMessage::mk_from_freelist_builder(
open_builder[ix],
freelist::Object::key_root,
open_meta[ix]->as_key_tweak());
Expand Down Expand Up @@ -65,7 +64,7 @@ namespace snmalloc
{
size_t ix_set = ring_set(meta);

for (size_t ix_way = 0; ix_way < RING_ASSOC; ix_way++)
for (size_t ix_way = 0; ix_way < DEALLOC_BATCH_RING_ASSOC; ix_way++)
{
size_t ix = ix_set + ix_way;
if (meta == open_meta[ix])
Expand All @@ -78,7 +77,7 @@ namespace snmalloc

size_t victim_ix = ix_set;
size_t victim_size = 0;
for (size_t ix_way = 0; ix_way < RING_ASSOC; ix_way++)
for (size_t ix_way = 0; ix_way < DEALLOC_BATCH_RING_ASSOC; ix_way++)
{
size_t ix = ix_set + ix_way;
if (open_meta[ix] == nullptr)
Expand Down Expand Up @@ -124,6 +123,34 @@ namespace snmalloc
}
};

template<typename Config>
struct RemoteDeallocCacheNoBatching
{
void init() {}

template<typename Forward>
void close_all(Forward)
{}

template<typename Forward>
SNMALLOC_FAST_PATH void dealloc(
typename Config::PagemapEntry::SlabMetadata*,
freelist::HeadPtr r,
Forward forward)
{
auto& entry = Config::Backend::get_metaentry(address_cast(r));
forward(
entry.get_remote()->trunc_id(),
SingletonRemoteMessage::emplace_in_alloc(r.as_void()));
}
};

template<typename Config>
using RemoteDeallocCacheBatchingImpl = std::conditional_t<
(DEALLOC_BATCH_RINGS > 0),
RemoteDeallocCacheBatching<Config, DEALLOC_BATCH_RINGS>,
RemoteDeallocCacheNoBatching<Config>>;

/**
* Stores the remote deallocation to batch them before sending
*/
Expand All @@ -132,7 +159,7 @@ namespace snmalloc
{
std::array<freelist::Builder<false>, REMOTE_SLOTS> list;

RemoteDeallocCacheBatching<Config> batching;
RemoteDeallocCacheBatchingImpl<Config> batching;

/**
* The total amount of memory we are waiting for before we will dispatch
Expand Down

0 comments on commit c2da3a6

Please sign in to comment.