Skip to content

Commit

Permalink
Startup speed (#665)
Browse files Browse the repository at this point in the history
* Refactor buddy allocator

Make it clearer the structure of add_block by pulling out remove_buddy.

* Give buddy a few elements so don't have to touch pagemap earlie on.

* Only use do and dont dump on pagemap

The do and dont dump calls were costings a lot during start up of snmalloc.  This reduces the times they are called to only be for the pagemap.
  • Loading branch information
mjp41 authored Jun 26, 2024
1 parent 835ab51 commit 4620220
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 74 deletions.
122 changes: 84 additions & 38 deletions src/snmalloc/backend_helpers/buddy.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@ namespace snmalloc
template<typename Rep, size_t MIN_SIZE_BITS, size_t MAX_SIZE_BITS>
class Buddy
{
std::array<RBTree<Rep>, MAX_SIZE_BITS - MIN_SIZE_BITS> trees{};
struct Entry
{
typename Rep::Contents cache[3];
RBTree<Rep> tree{};
};

std::array<Entry, MAX_SIZE_BITS - MIN_SIZE_BITS> entries{};
// All RBtrees at or above this index should be empty.
size_t empty_at_or_above{0};

Expand All @@ -42,13 +48,54 @@ namespace snmalloc
void invariant()
{
#ifndef NDEBUG
for (size_t i = empty_at_or_above; i < trees.size(); i++)
for (size_t i = empty_at_or_above; i < entries.size(); i++)
{
SNMALLOC_ASSERT(trees[i].is_empty());
SNMALLOC_ASSERT(entries[i].tree.is_empty());
// TODO check cache is empty
}
#endif
}

bool remove_buddy(typename Rep::Contents addr, size_t size)
{
auto idx = to_index(size);

// Empty at this range.
if (idx >= empty_at_or_above)
return false;

auto buddy = Rep::buddy(addr, size);

// Check local cache first
for (auto& e : entries[idx].cache)
{
if (Rep::equal(buddy, e))
{
if (!Rep::can_consolidate(addr, size))
return false;

e = entries[idx].tree.remove_min();
return true;
}
}

auto path = entries[idx].tree.get_root_path();
bool contains_buddy = entries[idx].tree.find(path, buddy);

if (!contains_buddy)
return false;

// Only check if we can consolidate after we know the buddy is in
// the buddy allocator. This is required to prevent possible segfaults
// from looking at the buddies meta-data, which we only know exists
// once we have found it in the red-black tree.
if (!Rep::can_consolidate(addr, size))
return false;

entries[idx].tree.remove_path(path);
return true;
}

public:
constexpr Buddy() = default;
/**
Expand All @@ -63,48 +110,39 @@ namespace snmalloc
*/
typename Rep::Contents add_block(typename Rep::Contents addr, size_t size)
{
auto idx = to_index(size);
empty_at_or_above = bits::max(empty_at_or_above, idx + 1);

validate_block(addr, size);

auto buddy = Rep::buddy(addr, size);
if (remove_buddy(addr, size))
{
// Add to next level cache
size *= 2;
addr = Rep::align_down(addr, size);
if (size == bits::one_at_bit(MAX_SIZE_BITS))
{
// Invariant should be checked on all non-tail return paths.
// Holds trivially here with current design.
invariant();
// Too big for this buddy allocator.
return addr;
}
return add_block(addr, size);
}

auto path = trees[idx].get_root_path();
bool contains_buddy = trees[idx].find(path, buddy);
auto idx = to_index(size);
empty_at_or_above = bits::max(empty_at_or_above, idx + 1);

if (contains_buddy)
for (auto& e : entries[idx].cache)
{
// Only check if we can consolidate after we know the buddy is in
// the buddy allocator. This is required to prevent possible segfaults
// from looking at the buddies meta-data, which we only know exists
// once we have found it in the red-black tree.
if (Rep::can_consolidate(addr, size))
if (Rep::equal(Rep::null, e))
{
trees[idx].remove_path(path);

// Add to next level cache
size *= 2;
addr = Rep::align_down(addr, size);
if (size == bits::one_at_bit(MAX_SIZE_BITS))
{
// Invariant should be checked on all non-tail return paths.
// Holds trivially here with current design.
invariant();
// Too big for this buddy allocator.
return addr;
}
return add_block(addr, size);
e = addr;
return Rep::null;
}

// Re-traverse as the path was to the buddy,
// but the representation says we cannot combine.
// We must find the correct place for this element.
// Something clever could be done here, but it's not worth it.
// path = trees[idx].get_root_path();
trees[idx].find(path, addr);
}
trees[idx].insert_path(path, addr);

auto path = entries[idx].tree.get_root_path();
entries[idx].tree.find(path, addr);
entries[idx].tree.insert_path(path, addr);
invariant();
return Rep::null;
}
Expand All @@ -121,7 +159,15 @@ namespace snmalloc
if (idx >= empty_at_or_above)
return Rep::null;

auto addr = trees[idx].remove_min();
auto addr = entries[idx].tree.remove_min();
for (auto& e : entries[idx].cache)
{
if (Rep::equal(Rep::null, addr) || Rep::compare(e, addr))
{
addr = std::exchange(e, addr);
}
}

if (addr != Rep::null)
{
validate_block(addr, size);
Expand Down
10 changes: 10 additions & 0 deletions src/snmalloc/ds/pagemap.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ namespace snmalloc
auto page_end = pointer_align_up<OS_PAGE_SIZE, char>(last);
size_t using_size = pointer_diff(page_start, page_end);
PAL::template notify_using<NoZero>(page_start, using_size);
if constexpr (pal_supports<CoreDump, PAL>)
{
PAL::notify_do_dump(page_start, using_size);
}
}

constexpr FlatPagemap() = default;
Expand Down Expand Up @@ -192,6 +196,12 @@ namespace snmalloc

auto new_body_untyped = PAL::reserve(request_size);

if constexpr (pal_supports<CoreDump, PAL>)
{
// Pagemap should not be in core dump except where it is non-zero.
PAL::notify_do_not_dump(new_body_untyped, request_size);
}

if (new_body_untyped == nullptr)
{
PAL::error("Failed to initialise snmalloc.");
Expand Down
6 changes: 6 additions & 0 deletions src/snmalloc/pal/pal_consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ namespace snmalloc
* This Pal provides a millisecond time source
*/
Time = (1 << 5),

/**
* This Pal provides selective core dumps, so
* modify which parts get dumped.
*/
CoreDump = (1 << 6),
};

/**
Expand Down
31 changes: 10 additions & 21 deletions src/snmalloc/pal/pal_freebsd.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ namespace snmalloc
* field is declared explicitly to remind anyone modifying this class to
* add new features that they should add any required feature flags.
*/
static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features;
static constexpr uint64_t pal_features =
PALBSD_Aligned::pal_features | CoreDump;

/**
* FreeBSD uses atypically small address spaces on its 64 bit RISC machines.
Expand All @@ -68,10 +69,8 @@ namespace snmalloc
/**
* Notify platform that we will not be using these pages.
*
* We use the `MADV_FREE` and `NADV_NOCORE` flags to `madvise`. The first
* allows the system to discard the page and replace it with a CoW mapping
* of the zero page. The second prevents this mapping from appearing in
* core files.
* We use the `MADV_FREE` flag to `madvise`. This allows the system to
* discard the page and replace it with a CoW mapping of the zero page.
*/
static void notify_not_using(void* p, size_t size) noexcept
{
Expand All @@ -80,7 +79,6 @@ namespace snmalloc
if constexpr (DEBUG)
memset(p, 0x5a, size);

madvise(p, size, MADV_NOCORE);
madvise(p, size, MADV_FREE);

if constexpr (mitigations(pal_enforce_access))
Expand All @@ -90,28 +88,19 @@ namespace snmalloc
}

/**
* Notify platform that we will be using these pages for reading.
*
* This is used only for pages full of zeroes and so we exclude them from
* core dumps.
* Notify platform that these pages should be included in a core dump.
*/
static void notify_using_readonly(void* p, size_t size) noexcept
static void notify_do_dump(void* p, size_t size) noexcept
{
PALBSD_Aligned<PALFreeBSD>::notify_using_readonly(p, size);
madvise(p, size, MADV_NOCORE);
madvise(p, size, MADV_CORE);
}

/**
* Notify platform that we will be using these pages.
*
* We may have previously marked this memory as not being included in core
* files, so mark it for inclusion again.
* Notify platform that these pages should not be included in a core dump.
*/
template<ZeroMem zero_mem>
static void notify_using(void* p, size_t size) noexcept
static void notify_do_not_dump(void* p, size_t size) noexcept
{
PALBSD_Aligned<PALFreeBSD>::notify_using<zero_mem>(p, size);
madvise(p, size, MADV_CORE);
madvise(p, size, MADV_NOCORE);
}

# if defined(__CHERI_PURE_CAPABILITY__)
Expand Down
23 changes: 8 additions & 15 deletions src/snmalloc/pal/pal_linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ namespace snmalloc
*
* We always make sure that linux has entropy support.
*/
static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy;
static constexpr uint64_t pal_features =
PALPOSIX::pal_features | Entropy | CoreDump;

static constexpr size_t page_size =
Aal::aal_name == PowerPC ? 0x10000 : PALPOSIX::page_size;
Expand Down Expand Up @@ -58,7 +59,6 @@ namespace snmalloc
void* p = PALPOSIX<PALLinux>::reserve(size);
if (p)
{
madvise(p, size, MADV_DONTDUMP);
# ifdef SNMALLOC_PAGEID
# ifndef PR_SET_VMA
# define PR_SET_VMA 0x53564d41
Expand Down Expand Up @@ -125,7 +125,6 @@ namespace snmalloc
if constexpr (DEBUG)
memset(p, 0x5a, size);

madvise(p, size, MADV_DONTDUMP);
madvise(p, size, madvise_free_flags);

if constexpr (mitigations(pal_enforce_access))
Expand All @@ -135,25 +134,19 @@ namespace snmalloc
}

/**
* Notify platform that we will be using these pages for reading.
*
* This is used only for pages full of zeroes and so we exclude them from
* core dumps.
* Notify platform that these pages should be included in a core dump.
*/
static void notify_using_readonly(void* p, size_t size) noexcept
static void notify_do_dump(void* p, size_t size) noexcept
{
PALPOSIX<PALLinux>::notify_using_readonly(p, size);
madvise(p, size, MADV_DONTDUMP);
madvise(p, size, MADV_DODUMP);
}

/**
* Notify platform that we will be using these pages.
* Notify platform that these pages should not be included in a core dump.
*/
template<ZeroMem zero_mem>
static void notify_using(void* p, size_t size) noexcept
static void notify_do_not_dump(void* p, size_t size) noexcept
{
PALPOSIX<PALLinux>::notify_using<zero_mem>(p, size);
madvise(p, size, MADV_DODUMP);
madvise(p, size, MADV_DONTDUMP);
}

static uint64_t get_entropy64()
Expand Down

0 comments on commit 4620220

Please sign in to comment.