Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wasm buffer manager support #4523

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions extension/json/test/doc_examples.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-DATASET CSV empty
-BUFFER_POOL_SIZE 134217728

--

Expand Down
1 change: 1 addition & 0 deletions extension/json/test/error.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-DATASET CSV empty
-BUFFER_POOL_SIZE 268435456

--

Expand Down
2 changes: 1 addition & 1 deletion src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct BufferPoolConstants {
#else
static constexpr uint64_t DEFAULT_VM_REGION_MAX_SIZE = static_cast<uint64_t>(1) << 43; // (8TB)
#endif
static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 28; // (256MB)
static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 26; // (64MB)
};

struct StorageConstants {
Expand Down
22 changes: 19 additions & 3 deletions src/include/storage/buffer_manager/buffer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <vector>

#include "common/types/types.h"
#include "storage/buffer_manager/memory_manager.h"
#include "storage/buffer_manager/page_state.h"
#include "storage/enums/page_read_policy.h"
#include "storage/file_handle.h"

Expand Down Expand Up @@ -222,10 +224,19 @@ class BufferManager {
// The function assumes that the requested page is already pinned.
void unpin(FileHandle& fileHandle, common::page_idx_t pageIdx);
uint8_t* getFrame(FileHandle& fileHandle, common::page_idx_t pageIdx) const {
#if BM_MALLOC
return fileHandle.getPageState(pageIdx)->getPage();
#else
return vmRegions[fileHandle.getPageSizeClass()]->getFrame(fileHandle.getFrameIdx(pageIdx));
#endif
}
common::frame_group_idx_t addNewFrameGroup(common::PageSizeClass pageSizeClass) {
common::frame_group_idx_t addNewFrameGroup(
common::PageSizeClass pageSizeClass [[maybe_unused]]) {
#if BM_MALLOC
return 0;
#else
return vmRegions[pageSizeClass]->addNewFrameGroup();
#endif
}
void removePageFromFrameIfNecessary(FileHandle& fileHandle, common::page_idx_t pageIdx);

Expand All @@ -242,8 +253,13 @@ class BufferManager {

uint64_t freeUsedMemory(uint64_t size);

void releaseFrameForPage(FileHandle& fileHandle, common::page_idx_t pageIdx) {
void releaseFrameForPage(FileHandle& fileHandle [[maybe_unused]],
common::page_idx_t pageIdx [[maybe_unused]]) {
#if BM_MALLOC
// Page is freed instead in PageState::resetToEvicted
#else
vmRegions[fileHandle.getPageSizeClass()]->releaseFrame(fileHandle.getFrameIdx(pageIdx));
#endif
}

uint64_t evictPages();
Expand All @@ -257,7 +273,7 @@ class BufferManager {
std::atomic<uint64_t> nonEvictableMemory;
// Each VMRegion corresponds to a virtual memory region of a specific page size. Currently, we
// hold two sizes of REGULAR_PAGE and TEMP_PAGE.
std::vector<std::unique_ptr<VMRegion>> vmRegions;
std::array<std::unique_ptr<VMRegion>, 2> vmRegions;
std::vector<std::unique_ptr<FileHandle>> fileHandles;
std::unique_ptr<Spiller> spiller;
common::VirtualFileSystem* vfs;
Expand Down
25 changes: 24 additions & 1 deletion src/include/storage/buffer_manager/page_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

#include "common/assert.h"

// Alternative variant of the buffer manager which doesn't rely on MADV_DONTNEED (on Unix) for
// evicting pages (which is unavailable in Webassembly runtimes)
#define BM_MALLOC __WASM__
#if BM_MALLOC
#include <memory>
#endif

namespace kuzu {
namespace storage {

Expand Down Expand Up @@ -74,12 +81,28 @@ class PageState {
bool isDirty() const { return stateAndVersion & DIRTY_MASK; }
uint64_t getStateAndVersion() const { return stateAndVersion.load(); }

void resetToEvicted() { stateAndVersion.store(EVICTED << NUM_BITS_TO_SHIFT_FOR_STATE); }
void resetToEvicted() {
stateAndVersion.store(EVICTED << NUM_BITS_TO_SHIFT_FOR_STATE);
#if BM_MALLOC
page.reset();
#endif
}

#if BM_MALLOC
uint8_t* getPage() const { return page.get(); }
uint8_t* allocatePage(uint64_t pageSize) {
page = std::make_unique<uint8_t[]>(pageSize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should check allocation failure here to avoid seg faults.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interestingly in webassembly the tests are compiled with the default ABORTING_MALLOC=1, which aborts the webassembly process if it runs out of memory instead of returning 0, but outside of the tests we're compiling with ALLOW_MEMORY_GROWTH, which has the default of having malloc return 0 on failure.
So I think we should set ABORTING_MALLOC=0 for the tests just so it's handled the same (not that we have any tests where we expect this yet).

return page.get();
}
#endif

private:
// Highest 1 bit is dirty bit, and the rest are page state and version bits.
// In the rest bits, the lowest 1 byte is state, and the rest are version.
std::atomic<uint64_t> stateAndVersion;
#if BM_MALLOC
std::unique_ptr<uint8_t[]> page;
#endif
};

} // namespace storage
Expand Down
34 changes: 25 additions & 9 deletions src/storage/buffer_manager/buffer_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ BufferManager::BufferManager(const std::string& databasePath, const std::string&
: bufferPoolSize{bufferPoolSize}, evictionQueue{bufferPoolSize / KUZU_PAGE_SIZE},
usedMemory{evictionQueue.getCapacity() * sizeof(EvictionCandidate)}, vfs{vfs} {
verifySizeParams(bufferPoolSize, maxDBSize);
vmRegions.resize(2);
#if !BM_MALLOC
vmRegions[0] = std::make_unique<VMRegion>(REGULAR_PAGE, maxDBSize);
vmRegions[1] = std::make_unique<VMRegion>(TEMP_PAGE, bufferPoolSize);
#endif

// TODO(bmwinger): It may be better to spill to disk in a different location for remote file
// systems, or even in general.
Expand Down Expand Up @@ -141,7 +142,12 @@ uint8_t* BufferManager::pin(FileHandle& fileHandle, page_idx_t pageIdx,
throw BufferManagerException(
"Eviction queue is full! This should be impossible.");
}
#if BM_MALLOC
KU_ASSERT(pageState->getPage());
return pageState->getPage();
#else
return getFrame(fileHandle, pageIdx);
#endif
}
} break;
case PageState::UNLOCKED:
Expand Down Expand Up @@ -193,12 +199,19 @@ void handleAccessViolation(unsigned int exceptionCode, PEXCEPTION_POINTERS excep

// Returns true if the function completes successfully
inline bool try_func(const std::function<void(uint8_t*)>& func, uint8_t* frame,
const std::vector<std::unique_ptr<VMRegion>>& vmRegions, PageSizeClass pageSizeClass) {
#if defined(_WIN32)
const std::array<std::unique_ptr<VMRegion>, 2>& vmRegions [[maybe_unused]],
PageSizeClass pageSizeClass [[maybe_unused]]) {
#if BM_MALLOC
if (frame == nullptr) {
return false;
}
#endif

#if defined(_WIN32) && !BM_MALLOC
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macro here is becoming more a bit hard to follow now. I wonder if we should choose to duplicate the code a bit to separate them more clearly.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the second #if/#else removed is it sufficiently clear?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep 👍

try {
#endif
func(frame);
#if defined(_WIN32)
#if defined(_WIN32) && !BM_MALLOC
} catch (AccessViolation& exc) {
// If we encounter an acess violation within the VM region,
// the page was decomitted by another thread
Expand All @@ -209,9 +222,6 @@ inline bool try_func(const std::function<void(uint8_t*)>& func, uint8_t* frame,
throw EXCEPTION_ACCESS_VIOLATION;
}
}
#else
(void)pageSizeClass;
(void)vmRegions;
#endif
return true;
}
Expand Down Expand Up @@ -309,8 +319,7 @@ bool BufferManager::claimAFrame(FileHandle& fileHandle, page_idx_t pageIdx,
if (!reserve(pageSizeToClaim)) {
return false;
}
#ifdef _WIN32
// We need to commit memory explicitly on Windows.
#if _WIN32 && !BM_MALLOC
// Committing in this context means reserving physical memory/page file space for a segment of
// virtual memory. On Linux/Unix this is automatic when you write to the memory address.
auto result =
Expand Down Expand Up @@ -408,9 +417,16 @@ void BufferManager::cachePageIntoFrame(FileHandle& fileHandle, page_idx_t pageId
PageReadPolicy pageReadPolicy) {
auto pageState = fileHandle.getPageState(pageIdx);
pageState->clearDirty();
#if BM_MALLOC
pageState->allocatePage(fileHandle.getPageSize());
if (pageReadPolicy == PageReadPolicy::READ_PAGE) {
fileHandle.readPageFromDisk(pageState->getPage(), pageIdx);
}
#else
if (pageReadPolicy == PageReadPolicy::READ_PAGE) {
fileHandle.readPageFromDisk(getFrame(fileHandle, pageIdx), pageIdx);
}
#endif
}

void BufferManager::removeFilePagesFromFrames(FileHandle& fileHandle) {
Expand Down
1 change: 1 addition & 0 deletions test/test_files/copy/copy_after_error.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-DATASET CSV empty
-BUFFER_POOL_SIZE 268435456
--

-CASE CopyNodeAfterError
Expand Down
2 changes: 2 additions & 0 deletions test/test_files/dml_rel/create/create_ldbc_sf01.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
-DATASET CSV ldbc-sf01
-BUFFER_POOL_SIZE 134217728

--

-CASE CreateLikeComment
Expand Down
1 change: 1 addition & 0 deletions test/test_files/dml_rel/delete/delete_ldbc_sf01.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-DATASET CSV ldbc-sf01
-BUFFER_POOL_SIZE 134217728
--

-CASE DeleteLikeComment1
Expand Down
1 change: 1 addition & 0 deletions test/test_files/dml_rel/set/set_ldbc_sf01.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-DATASET CSV ldbc-sf01
-BUFFER_POOL_SIZE 134217728
--

-CASE SetLikeComment
Expand Down
3 changes: 2 additions & 1 deletion test/test_files/issue/issue5.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-DATASET CSV ldbc-sf01
-BUFFER_POOL_SIZE 134217728

--

Expand All @@ -18,4 +19,4 @@
---- ok
-STATEMENT MATCH (a:Person)-[r:knows_with_null]->(b:Person) WHERE r.__EXTRAID IS NOT NULL RETURN COUNT(*)
---- 1
0
0
1 change: 1 addition & 0 deletions test/test_files/tck/match/match7.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-DATASET CSV tck
-BUFFER_POOL_SIZE 268435456
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this due to recursive joins?


--

Expand Down