diff --git a/include/photon/thread/stack-allocator.h b/include/photon/thread/stack-allocator.h new file mode 120000 index 00000000..63cf8f90 --- /dev/null +++ b/include/photon/thread/stack-allocator.h @@ -0,0 +1 @@ +../../../thread/stack-allocator.h \ No newline at end of file diff --git a/thread/stack-allocator.cpp b/thread/stack-allocator.cpp new file mode 100644 index 00000000..a80bc48f --- /dev/null +++ b/thread/stack-allocator.cpp @@ -0,0 +1,167 @@ +/* +Copyright 2022 The Photon Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#if defined(__linux__) +#include +#endif +#include +#include +#include +#include + +#include + +namespace photon { + +template +class PooledStackAllocator { + constexpr static bool is_power2(size_t n) { return (n & (n - 1)) == 0; } + static_assert(is_power2(ALIGNMENT), "must be 2^n"); + static_assert(is_power2(MAX_ALLOCATION_SIZE), "must be 2^n"); + const static size_t N_SLOTS = + __builtin_ffsl(MAX_ALLOCATION_SIZE / MIN_ALLOCATION_SIZE); + +public: + PooledStackAllocator() {} + +protected: + size_t in_pool_size = 0; + static size_t trim_threshold; + + static void* __alloc(size_t alloc_size) { + void* ptr; + int ret = ::posix_memalign(&ptr, ALIGNMENT, alloc_size); + if (ret != 0) { + errno = ret; + return nullptr; + } +#if defined(__linux__) + madvise(ptr, alloc_size, MADV_NOHUGEPAGE); +#endif + return ptr; + } + + static void __dealloc(void* ptr, size_t size) { + madvise(ptr, size, MADV_DONTNEED); + free(ptr); + } + + struct Slot { + std::vector> pool; + + ~Slot() { + for (auto pt : pool) { + __dealloc(pt.first, pt.second); + } + } + std::pair get() { + if (!pool.empty()) { + auto ret = pool.back(); + pool.pop_back(); + return ret; + } + return {nullptr, 0}; + } + void put(void* ptr, size_t size) { pool.emplace_back(ptr, size); } + }; + + static inline uint32_t get_slot(uint32_t length) { + static auto base = __builtin_clz(MIN_ALLOCATION_SIZE - 1); + auto index = __builtin_clz(length - 1); + return base > index ? base - index : 0; + } + + Slot slots[N_SLOTS]; + +public: + void* alloc(size_t size) { + auto idx = get_slot(size); + if (unlikely(idx > N_SLOTS)) { + // larger than biggest slot + return __alloc(size); + } + auto ptr = slots[idx].get(); + if (unlikely(!ptr.first)) { + // slots[idx] empty + return __alloc(size); + } + // got from pool + in_pool_size -= ptr.second; + return ptr.first; + } + int dealloc(void* ptr, size_t size) { + auto idx = get_slot(size); + if (unlikely(idx > N_SLOTS || + (in_pool_size + size >= trim_threshold))) { + // big block or in-pool buffers reaches to threshold + __dealloc(ptr, size); + return 0; + } + // Collect into pool + in_pool_size += size; + slots[idx].put(ptr, size); + return 0; + } + size_t trim(size_t keep_size) { + size_t count = 0; + for (int i = 0; in_pool_size > keep_size; i = (i + 1) % N_SLOTS) { + if (!slots[i].pool.empty()) { + auto ptr = slots[i].pool.back(); + slots[i].pool.pop_back(); + in_pool_size -= ptr.second; + count += ptr.second; + __dealloc(ptr.first, ptr.second); + } + } + return count; + } + size_t threshold(size_t x) { + trim_threshold = x; + return trim_threshold; + } +}; + +template +size_t PooledStackAllocator::trim_threshold = 1024UL * 1024 * 1024; + +static PooledStackAllocator<>& get_pooled_stack_allocator() { + thread_local PooledStackAllocator<> _alloc; + return _alloc; +} + +void* pooled_stack_alloc(void*, size_t stack_size) { + return get_pooled_stack_allocator().alloc(stack_size); +} +void pooled_stack_dealloc(void*, void* stack_ptr, size_t stack_size) { + get_pooled_stack_allocator().dealloc(stack_ptr, stack_size); +} + +size_t pooled_stack_trim_current_vcpu(size_t keep_size) { + return get_pooled_stack_allocator().trim(keep_size); +} + +size_t pooled_stack_trim_threshold(size_t x) { + return get_pooled_stack_allocator().threshold(x); +} + +size_t pooled_stack_trim_current_vcpu(size_t keep_size); +size_t pooled_stack_trim_threshold(size_t x); + +} // namespace photon \ No newline at end of file diff --git a/thread/stack-allocator.h b/thread/stack-allocator.h new file mode 100644 index 00000000..e79cf216 --- /dev/null +++ b/thread/stack-allocator.h @@ -0,0 +1,36 @@ +/* +Copyright 2022 The Photon Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include + +namespace photon { +// Threadlocal Pooled stack allocator +// better performance, and keep thread safe +void* pooled_stack_alloc(void*, size_t stack_size); +void pooled_stack_dealloc(void*, void* stack_ptr, size_t stack_size); + +// Free memory in pooled stack allocator till in-pool memory size less than +// `keep_size` for current vcpu +size_t pooled_stack_trim_current_vcpu(size_t keep_size); +// Pooled stack allocator set keep-in-pool size +size_t pooled_stack_trim_threshold(size_t threshold); + +inline void use_pooled_stack_allocator() { + set_photon_thread_stack_allocator({&pooled_stack_alloc, nullptr}, + {&pooled_stack_dealloc, nullptr}); +} +} // namespace photon \ No newline at end of file diff --git a/thread/test/CMakeLists.txt b/thread/test/CMakeLists.txt index 44634a93..9311430e 100644 --- a/thread/test/CMakeLists.txt +++ b/thread/test/CMakeLists.txt @@ -34,4 +34,8 @@ add_test(NAME test-lib-data COMMAND $) add_executable(test-multi-vcpu-locking test-multi-vcpu-locking.cpp) target_link_libraries(test-multi-vcpu-locking PRIVATE photon_static) -add_test(NAME test-multi-vcpu-locking COMMAND $) \ No newline at end of file +add_test(NAME test-multi-vcpu-locking COMMAND $) + +add_executable(test-pooled-stack-allocator test-pooled-stack-allocator.cpp) +target_link_libraries(test-pooled-stack-allocator PRIVATE photon_static) +add_test(NAME test-pooled-stack-allocator COMMAND $) \ No newline at end of file diff --git a/thread/test/test-pooled-stack-allocator.cpp b/thread/test/test-pooled-stack-allocator.cpp new file mode 100644 index 00000000..86155233 --- /dev/null +++ b/thread/test/test-pooled-stack-allocator.cpp @@ -0,0 +1,76 @@ +/* +Copyright 2022 The Photon Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include + +static constexpr uint64_t N = 10000; + +uint64_t do_test(int mode) { + photon::WorkPool pool(4, photon::INIT_EVENT_DEFAULT, photon::INIT_IO_NONE, + mode); + photon::semaphore sem(0); + auto start = photon::now; + for (int i = 0; i < N; i++) { + pool.async_call(new auto([&] { sem.signal(1); })); + } + sem.wait(N); + auto done = photon::now; + return done - start; +} + +TEST(Normal, NoPool) { + photon::set_photon_thread_stack_allocator(); + photon::init(); + DEFER(photon::fini()); + auto spend = do_test(0); + LOG_TEMP("Spent ` us", spend); +} + +TEST(Normal, ThreadPool) { + photon::set_photon_thread_stack_allocator(); + photon::init(); + DEFER(photon::fini()); + auto spend = do_test(64); + LOG_TEMP("Spent ` us", spend); +} + +TEST(PooledAllocator, PooledStack) { + photon::use_pooled_stack_allocator(); + photon::init(); + DEFER(photon::fini()); + auto spend = do_test(0); + LOG_TEMP("Spent ` us", spend); +} + +TEST(PooledAllocator, BypassThreadPool) { + photon::use_pooled_stack_allocator(); + photon::set_bypass_threadpool(); + photon::init(); + DEFER(photon::fini()); + auto spend = do_test(64); + LOG_TEMP("Spent ` us", spend); +} + +int main(int argc, char** arg) { + ::testing::InitGoogleTest(&argc, arg); + set_log_output_level(ALOG_WARN); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/thread/thread-pool.cpp b/thread/thread-pool.cpp index 64a3e511..7d6fac43 100644 --- a/thread/thread-pool.cpp +++ b/thread/thread-pool.cpp @@ -20,8 +20,36 @@ limitations under the License. namespace photon { + static bool __bypass_threadpool = false; + + bool __should_bypass_threadpool() { + return __bypass_threadpool; + } + + void set_bypass_threadpool(bool flag) { + __bypass_threadpool = true; + } + TPControl* ThreadPoolBase::thread_create_ex(thread_entry start, void* arg, bool joinable) { + if (m_capacity == 0 || __should_bypass_threadpool()) { + auto th = photon::thread_create(start, arg, (uint64_t)m_reserved, + sizeof(TPControl)); + auto pCtrl = photon::thread_reserved_space(th); + pCtrl->th = th; + /* Actually unused in `bypass_threadpool` condition + because all threadcreate work are forwarding to + `photon::thread_create` + + // pCtrl->joinable = joinable; + // pCtrl->start = start; + // pCtrl->arg = arg; + */ + if (joinable) { + photon::thread_enable_join(th); + } + return pCtrl; + } auto pCtrl = B::get(); { SCOPED_LOCK(pCtrl->m_mtx); @@ -101,6 +129,10 @@ namespace photon } void ThreadPoolBase::join(TPControl* pCtrl) { + if (m_capacity == 0 || __should_bypass_threadpool()) { + photon::thread_join((photon::join_handle*)pCtrl->th); + return; + } auto should_put = do_thread_join(pCtrl); if (should_put) pCtrl->pool->put(pCtrl); diff --git a/thread/thread-pool.h b/thread/thread-pool.h index 448f29c9..8d0433bb 100644 --- a/thread/thread-pool.h +++ b/thread/thread-pool.h @@ -96,6 +96,10 @@ namespace photon ThreadPoolBase::delete_thread_pool(p); } + // Use `photon::thread_create` directly for all thread-pool + // When using pooled-stack-allocator, or other high performance stack + // allocator + void set_bypass_threadpool(bool flag = true); template class ThreadPool : public ThreadPoolBase diff --git a/tools/export-header.py b/tools/export-header.py index 5bb6d3d4..fc8e69b1 100755 --- a/tools/export-header.py +++ b/tools/export-header.py @@ -25,12 +25,12 @@ sys.exit(-1) fn = os.path.abspath(fn) -if not '/photon/' in fn: +if not '/PhotonLibOS/' in fn: print('must be a header file of photon') sys.exit(-1) parts = fn.split('/') -i = parts.index('photon') + 1 +i = parts.index('PhotonLibOS') + 1 photon = '/'.join(parts[:i]) file = parts[i:] include = photon + '/include/photon/'