Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement MCS Combining lock #666

Merged
merged 4 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 23 additions & 19 deletions src/snmalloc/backend/globalconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,33 +96,37 @@ namespace snmalloc
// of allocators.
SNMALLOC_SLOW_PATH static void ensure_init_slow()
{
FlagLock lock{initialisation_lock};
if (initialised)
return;

with(initialisation_lock, [&]() {
#ifdef SNMALLOC_TRACING
message<1024>("Run init_impl");
message<1024>("Run init_impl");
#endif

if (initialised)
return;
if (initialised)
return;

LocalEntropy entropy;
entropy.init<Pal>();
// Initialise key for remote deallocation lists
RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key());
LocalEntropy entropy;
entropy.init<Pal>();
// Initialise key for remote deallocation lists
RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key());

// Need to randomise pagemap location. If requested and not a
// StrictProvenance architecture, randomize its table's location within a
// significantly larger address space allocation.
static constexpr bool pagemap_randomize =
mitigations(random_pagemap) && !aal_supports<StrictProvenance>;
// Need to randomise pagemap location. If requested and not a
// StrictProvenance architecture, randomize its table's location within
// a significantly larger address space allocation.
static constexpr bool pagemap_randomize =
mitigations(random_pagemap) && !aal_supports<StrictProvenance>;

Pagemap::concretePagemap.template init<pagemap_randomize>();
Pagemap::concretePagemap.template init<pagemap_randomize>();

if constexpr (aal_supports<StrictProvenance>)
{
Authmap::init();
}
if constexpr (aal_supports<StrictProvenance>)
{
Authmap::init();
}

initialised.store(true, std::memory_order_release);
initialised.store(true, std::memory_order_release);
});
}

public:
Expand Down
14 changes: 9 additions & 5 deletions src/snmalloc/backend_helpers/lockrange.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace snmalloc
* This is infrequently used code, a spin lock simplifies the code
* considerably, and should never be on the fast path.
*/
FlagWord spin_lock{};
CombiningLock spin_lock{};

public:
static constexpr bool Aligned = ParentRange::Aligned;
Expand All @@ -35,14 +35,18 @@ namespace snmalloc

CapPtr<void, ChunkBounds> alloc_range(size_t size)
{
FlagLock lock(spin_lock);
return parent.alloc_range(size);
CapPtr<void, ChunkBounds> result;
with(spin_lock, [&]() {
{
result = parent.alloc_range(size);
}
});
return result;
}

void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
{
FlagLock lock(spin_lock);
parent.dealloc_range(base, size);
with(spin_lock, [&]() { parent.dealloc_range(base, size); });
}
};
};
Expand Down
224 changes: 224 additions & 0 deletions src/snmalloc/ds/combininglock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
#pragma once

#include "../aal/aal.h"
#include "../pal/pal.h"

#include <atomic>
#include <functional>

namespace snmalloc
{
class CombineLockNode;

struct CombiningLock
{
// Fast path lock incase there is no contention.
std::atomic<bool> flag{false};

// MCS queue of work items
std::atomic<CombineLockNode*> head{nullptr};
};

/**
* @brief Combinations of MCS queue lock with Flat Combining
*
* Each element in the queue has a pointer to a work item.
* This means when under contention the thread holding the lock
* can perform the work.
*
* As the work items are arbitrary lambdas there are no simplifications
* for combining related work items. I.e. original Flat Combining paper
* might sort a collection of inserts, and perform them in a single traversal.
*
* Note that, we should perhaps add a Futex/WakeOnAddress mode to improve
* performance in the contended case, rather than spinning.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could perhaps templatize the class on <bool notify = FALSE> or such?

*/
class CombineLockNode
{
template<typename F>
friend class CombineLockNodeTempl;

enum class LockStatus
{
// The work for this node has not been completed.
WAITING,

// The work for this thread has been completed, and it is not the
// last element in the queue.
DONE,

// The work for this thread has not been completed, and it is the
// head of the queue.
READY
};

// Status of the queue, set by the thread at the head of the queue,
// When it makes the thread for this node either the head of the queue
// or completes its work.
std::atomic<LockStatus> status{LockStatus::WAITING};

// Used to store the queue
std::atomic<CombineLockNode*> next{nullptr};

// Stores the C++ lambda associated with this node in the queue.
void (*f_raw)(CombineLockNode*);

void release(CombiningLock& lock)
{
lock.flag.store(false, std::memory_order_release);
}

void set_status(LockStatus s)
{
status.store(s, std::memory_order_release);
}

constexpr CombineLockNode(void (*f)(CombineLockNode*)) : f_raw(f) {}

SNMALLOC_FAST_PATH void attach(CombiningLock& lock)
{
// Test if no one is waiting
if (lock.head.load(std::memory_order_relaxed) == nullptr)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it is beneficial to separate this region out as fast path subject to be inlined. The remaining parts can be put into a attach_slow function.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the this code is used here enough to warrant that, but maybe I should for future use.

{
// No one was waiting so low contention. Attempt to acquire the flag
// lock.
if (lock.flag.exchange(true, std::memory_order_acquire) == false)
{
// We grabbed the lock.
f_raw(this);

// Release the lock
release(lock);
return;
}
}
attach_slow(lock);
}

SNMALLOC_SLOW_PATH void attach_slow(CombiningLock& lock)
{
// There is contention for the lock, we need to add our work to the
// queue of pending work
auto prev = lock.head.exchange(this, std::memory_order_acq_rel);

if (prev != nullptr)
{
// If we aren't the head, link into predecessor
prev->next.store(this, std::memory_order_release);

// Wait to for predecessor to complete
while (status.load(std::memory_order_relaxed) == LockStatus::WAITING)
Aal::pause();

// Determine if another thread completed our work.
if (status.load(std::memory_order_acquire) == LockStatus::DONE)
return;
}
else
{
// We are the head of the queue. Spin until we acquire the fast path
// lock. As we are in the queue future requests shouldn't try to
// acquire the fast path lock, but stale views of the queue being empty
// could still be concurrent with this thread.
while (lock.flag.exchange(true, std::memory_order_acquire))
{
while (lock.flag.load(std::memory_order_relaxed))
{
Aal::pause();
}
}

// We could set
// status = LockStatus::Ready
// However, the subsequent state assumes it is Ready, and
// nothing would read it.
}

// We are the head of the queue, and responsible for
// waking/performing our and subsequent work.
auto curr = this;
while (true)
{
// Perform work for head of the queue
curr->f_raw(curr);

// Determine if there are more elements.
auto n = curr->next.load(std::memory_order_acquire);
if (n != nullptr)
{
// Signal this work was completed and move on to
// next item.
curr->set_status(LockStatus::DONE);
curr = n;
continue;
}

// This could be the end of the queue, attempt to close the
// queue.
auto curr_c = curr;
if (lock.head.compare_exchange_strong(
curr_c,
nullptr,
std::memory_order_release,
std::memory_order_relaxed))
{
// Queue was successfully closed.
// Notify last element the work was completed.
curr->set_status(LockStatus::DONE);
release(lock);
return;
}

// Failed to close the queue wait for next thread to be
// added.
while (curr->next.load(std::memory_order_relaxed) == nullptr)
Aal::pause();

// As we had to wait, give the job to the next thread
// to carry on performing the work.
n = curr->next.load(std::memory_order_acquire);
n->set_status(LockStatus::READY);

// Notify the thread that we completed its work.
// Note that this needs to be done last, as we can't read
// curr->next after setting curr->status
curr->set_status(LockStatus::DONE);
return;
}
}
};

template<typename F>
class CombineLockNodeTempl : CombineLockNode
{
template<typename FF>
friend void with(CombiningLock&, FF&&);

// This holds the closure for the lambda
F f;

// Untyped version of calling f to store in the node.
static void invoke(CombineLockNode* self)
{
auto self_templ = reinterpret_cast<CombineLockNodeTempl*>(self);
self_templ->f();
}

CombineLockNodeTempl(CombiningLock& lock, F&& f_)
: CombineLockNode(invoke), f(f_)
{
attach(lock);
}
};

/**
* Lock primitive. This takes a reference to a Lock, and a thunk to
* call when the lock is available. The thunk should be independent of
* the current thread as the thunk may be executed by a different thread.
*/
template<typename F>
inline void with(CombiningLock& lock, F&& f)
{
CombineLockNodeTempl<F> node{lock, std::forward<F>(f)};
}
} // namespace snmalloc
1 change: 1 addition & 0 deletions src/snmalloc/ds/ds.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "../pal/pal.h"
#include "aba.h"
#include "allocconfig.h"
#include "combininglock.h"
#include "entropy.h"
#include "flaglock.h"
#include "mpmcstack.h"
Expand Down
7 changes: 7 additions & 0 deletions src/snmalloc/ds/flaglock.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,11 @@ namespace snmalloc
lock.flag.store(false, std::memory_order_release);
}
};

template<typename F>
inline void with(FlagWord& lock, F&& f)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not when? ;)

{
FlagLock l(lock);
f();
}
} // namespace snmalloc
17 changes: 9 additions & 8 deletions src/snmalloc/ds/singleton.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@ namespace snmalloc

if (SNMALLOC_UNLIKELY(!initialised.load(std::memory_order_acquire)))
{
FlagLock lock(flag);
if (!initialised)
{
init(&obj);
initialised.store(true, std::memory_order_release);
if (first != nullptr)
*first = true;
}
with(flag, [&]() {
if (!initialised)
{
init(&obj);
initialised.store(true, std::memory_order_release);
if (first != nullptr)
*first = true;
}
});
}
return obj;
}
Expand Down
Loading
Loading