From 169cd22ef913816d4070917098dc471dfdcb522d Mon Sep 17 00:00:00 2001 From: Tapish Date: Fri, 5 Apr 2024 15:32:26 +0200 Subject: [PATCH] cuda fix and fix waker_id bug --- examples/cuda_mandelbrot.cu | 152 +++++----- redGrapes/TaskCtx.hpp | 2 +- ...roperties.hpp => cuda_task_properties.hpp} | 12 +- redGrapes/dispatch/cuda/cuda_worker.hpp | 267 ++++++++++++++++++ redGrapes/dispatch/cuda/event_pool.hpp | 10 +- redGrapes/dispatch/cuda/scheduler.hpp | 194 ------------- redGrapes/resource/access/io.hpp | 3 +- redGrapes/scheduler/cuda_thread_scheduler.hpp | 73 +++++ redGrapes/scheduler/pool_scheduler.tpp | 7 +- redGrapesConfig.cmake | 2 +- 10 files changed, 440 insertions(+), 282 deletions(-) rename redGrapes/dispatch/cuda/{task_properties.hpp => cuda_task_properties.hpp} (76%) create mode 100644 redGrapes/dispatch/cuda/cuda_worker.hpp delete mode 100644 redGrapes/dispatch/cuda/scheduler.hpp create mode 100644 redGrapes/scheduler/cuda_thread_scheduler.hpp diff --git a/examples/cuda_mandelbrot.cu b/examples/cuda_mandelbrot.cu index e4f78065..8ae9cb68 100644 --- a/examples/cuda_mandelbrot.cu +++ b/examples/cuda_mandelbrot.cu @@ -5,7 +5,18 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include #include #include @@ -13,23 +24,6 @@ #include #include -enum SchedulerTag -{ - SCHED_CUDA -}; - -#define REDGRAPES_TASK_PROPERTIES dispatch::cuda::CudaTaskProperties, scheduler::SchedulingTagProperties - -#include "redGrapes/dispatch/cuda/scheduler.hpp" -#include -#include "redGrapes/resource/fieldresource.hpp" -#include "redGrapes/resource/ioresource.hpp" -#include "redGrapes/scheduler/pool_scheduler.hpp" -#include "redGrapes/scheduler/tag_match.hpp" -#include "redGrapes/task/property/resource.hpp" - -namespace rg = redGrapes; - struct Color { float r, g, b; @@ -70,18 +64,27 @@ __global__ void mandelbrot( out[index] = Color{cosf(float(i) / 7.0), cosf(2.0 + float(i) / 11.0), cosf(4.0 + float(i) / 13.0)}; } -int main() +struct CudaTag { - auto pool_scheduler = std::make_shared(4 /* number of CPU workers */); +}; + +using RGTask = redGrapes::Task; - auto cuda_scheduler = std::make_shared( - [](rg::Task const& t) { return t.required_scheduler_tags.test(SCHED_CUDA); }, - 4 /* number of cuda streams */ - ); +int main() +{ + spdlog::set_level(spdlog::level::trace); + spdlog::set_pattern("[thread %t] %^[%l]%$ %v"); - rg::idle = [cuda_scheduler] { cuda_scheduler->poll(); }; + auto rg = redGrapes::init( + redGrapes::SchedulerDescription( + std::make_shared>(), + CudaTag{}), + redGrapes::SchedulerDescription( + std::make_shared>>( + 4), + redGrapes::DefaultTag{})); - rg::init(rg::scheduler::make_tag_match_scheduler().add({}, pool_scheduler).add({SCHED_CUDA}, cuda_scheduler)); + auto& cudaSched = rg.getScheduler(); double mid_x = 0.41820187155955555; double mid_y = 0.32743154895555555; @@ -90,10 +93,10 @@ int main() size_t height = 4096; size_t area = width * height; - rg::IOResource host_buffer; - rg::IOResource device_buffer; + redGrapes::IOResource host_buffer; + redGrapes::IOResource device_buffer; - rg::emplace_task( + rg.emplace_task( [area](auto host_buffer) { void* ptr; @@ -102,7 +105,7 @@ int main() }, host_buffer.write()); - rg::emplace_task( + rg.emplace_task( [area](auto device_buffer) { void* ptr; @@ -124,52 +127,55 @@ int main() /* * calculate picture */ - rg::emplace_task( - [width, height, area, i, mid_x, mid_y, w](auto device_buffer) - { - double begin_x = mid_x - w; - double end_x = mid_x + w; - double begin_y = mid_y - w; - double end_y = mid_y + w; - - dim3 threadsPerBlock(8, 8); - dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y); - - mandelbrot<<>>( - begin_x, - end_x, - begin_y, - end_y, - width, - height, - *device_buffer); - std::cout << "launched kernel to stream " << rg::dispatch::cuda::current_stream << std::endl; - }, - rg::TaskProperties::Builder().scheduling_tags({SCHED_CUDA}), - device_buffer.write()); + rg.emplace_task( + [width, height, area, i, mid_x, mid_y, w, &cudaSched](auto device_buffer) + { + double begin_x = mid_x - w; + double end_x = mid_x + w; + double begin_y = mid_y - w; + double end_y = mid_y + w; + + dim3 threadsPerBlock(8, 8); + dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y); + + auto current_stream = cudaSched.getCudaStream(0); + mandelbrot<<>>( + begin_x, + end_x, + begin_y, + end_y, + width, + height, + *device_buffer); + std::cout << "launched kernel to stream " << current_stream << std::endl; + }, + device_buffer.write()) + .cuda_stream_index(0u); /* * copy data */ - rg::emplace_task( - [area](auto host_buffer, auto device_buffer) - { - cudaMemcpyAsync( - *host_buffer, - *device_buffer, - area * sizeof(Color), - cudaMemcpyDeviceToHost, - rg::dispatch::cuda::current_stream); - std::cout << "launched memcpy to stream " << rg::dispatch::cuda::current_stream << std::endl; - }, - rg::TaskProperties::Builder().scheduling_tags({SCHED_CUDA}), - host_buffer.write(), - device_buffer.read()); + rg.emplace_task( + [area, &cudaSched](auto host_buffer, auto device_buffer) + { + auto current_stream = cudaSched.getCudaStream(0); + cudaMemcpyAsync( + *host_buffer, + *device_buffer, + area * sizeof(Color), + cudaMemcpyDeviceToHost, + current_stream); + std::cout << "launched memcpy to stream " << current_stream << std::endl; + }, + host_buffer.write(), + device_buffer.read()) + .cuda_stream_index(0u); + ; /* * write png */ - rg::emplace_task( + rg.emplace_task( [width, height, i](auto host_buffer) { std::stringstream step; @@ -179,9 +185,9 @@ int main() pngwriter png(width, height, 0, filename.c_str()); png.setcompressionlevel(9); - for(int y = 0; y < height; ++y) + for(size_t y = 0; y < height; ++y) { - for(int x = 0; x < width; ++x) + for(size_t x = 0; x < width; ++x) { auto& color = (*host_buffer)[x + y * width]; png.plot(x + 1, height - y, color.r, color.g, color.b); @@ -194,7 +200,7 @@ int main() host_buffer.read()); } - rg::emplace_task([](auto b) {}, host_buffer.write()).get(); + rg.emplace_task([]([[maybe_unused]] auto b) {}, host_buffer.write()).get(); auto t2 = std::chrono::high_resolution_clock::now(); std::cout << "runtime: " << std::chrono::duration_cast(t2 - t1).count() << " μs" @@ -203,7 +209,9 @@ int main() /* * cleanup */ - rg::emplace_task([](auto host_buffer) { cudaFreeHost(*host_buffer); }, host_buffer.write()); + rg.emplace_task([](auto host_buffer) { cudaFreeHost(*host_buffer); }, host_buffer.write()) + .cuda_stream_index(0u); - rg::emplace_task([](auto device_buffer) { cudaFree(*device_buffer); }, device_buffer.write()); + rg.emplace_task([](auto device_buffer) { cudaFree(*device_buffer); }, device_buffer.write()) + .cuda_stream_index(0u); } diff --git a/redGrapes/TaskCtx.hpp b/redGrapes/TaskCtx.hpp index 8c41ca97..42e5a34c 100644 --- a/redGrapes/TaskCtx.hpp +++ b/redGrapes/TaskCtx.hpp @@ -32,7 +32,7 @@ namespace redGrapes } else { - event->waker_id = event.task->scheduler_p->getNextWorkerID(); + event->waker_id = event.task->scheduler_p->getNextWorkerID() + 1; while(!event->is_reached()) TaskFreeCtx::idle(); } diff --git a/redGrapes/dispatch/cuda/task_properties.hpp b/redGrapes/dispatch/cuda/cuda_task_properties.hpp similarity index 76% rename from redGrapes/dispatch/cuda/task_properties.hpp rename to redGrapes/dispatch/cuda/cuda_task_properties.hpp index e8532fc0..f20634e6 100644 --- a/redGrapes/dispatch/cuda/task_properties.hpp +++ b/redGrapes/dispatch/cuda/cuda_task_properties.hpp @@ -1,4 +1,4 @@ -/* Copyright 2020 Michael Sippel +/* Copyright 2024 Tapish Narwal * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this @@ -7,6 +7,8 @@ #pragma once +#include + namespace redGrapes { namespace dispatch @@ -16,7 +18,7 @@ namespace redGrapes struct CudaTaskProperties { - std::optional cuda_event; + std::optional m_cuda_stream_idx; CudaTaskProperties() { @@ -30,6 +32,12 @@ namespace redGrapes Builder(PropertiesBuilder& b) : builder(b) { } + + PropertiesBuilder& cuda_stream_index(unsigned cuda_stream_idx) + { + *(builder.task->m_cuda_stream_idx) = cuda_stream_idx; + return builder; + } }; struct Patch diff --git a/redGrapes/dispatch/cuda/cuda_worker.hpp b/redGrapes/dispatch/cuda/cuda_worker.hpp new file mode 100644 index 00000000..ee861848 --- /dev/null +++ b/redGrapes/dispatch/cuda/cuda_worker.hpp @@ -0,0 +1,267 @@ +/* Copyright 2024 Tapish Narwal + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include "redGrapes/TaskCtx.hpp" +#include "redGrapes/dispatch/cuda/cuda_task_properties.hpp" +#include "redGrapes/dispatch/cuda/event_pool.hpp" +#include "redGrapes/scheduler/event.hpp" +#include "redGrapes/sync/cv.hpp" +#include "redGrapes/task/queue.hpp" + +#include +#include +#include + +#include + +namespace redGrapes::dispatch::cuda +{ + struct CudaStreamWrapper + { + cudaStream_t cuda_stream; + + CudaStreamWrapper() + { + cudaStreamCreate(&cuda_stream); + } + + CudaStreamWrapper(CudaStreamWrapper const& other) + { + spdlog::warn("CudaStreamWrapper copy constructor called!"); + } + + ~CudaStreamWrapper() + { + cudaStreamDestroy(cuda_stream); + } + }; + + // this class is not thread safe + // Stream dispatcher + template + struct CudaWorker + + { + using task_type = TTask; + + WorkerId id; + std::vector streams; + EventPool event_pool; + + /*! if true, the thread shall stop + * instead of waiting when it is out of jobs + */ + std::atomic_bool m_stop{false}; + std::atomic task_count{0}; + + + std::queue>> events; + std::recursive_mutex mutex; + + //! condition variable for waiting if queue is empty + CondVar cv; + + static constexpr size_t queue_capacity = 128; + task::Queue emplacement_queue{queue_capacity}; + task::Queue ready_queue{queue_capacity}; + + CudaWorker(WorkerId worker_id) : id(worker_id) + { + } + + CudaWorker(WorkerId worker_id, unsigned num_streams) : id{worker_id}, streams{num_streams} + { + } + + inline scheduler::WakerId get_waker_id() + { + return id + 1; + } + + inline bool wake() + { + return cv.notify(); + } + + void stop() + { + SPDLOG_TRACE("Worker::stop()"); + m_stop.store(true, std::memory_order_release); + wake(); + } + + /* adds a new task to the emplacement queue + * and wakes up thread to kickstart execution + */ + inline void dispatch_task(TTask& task) + { + emplacement_queue.push(&task); + wake(); + } + + inline void execute_task(TTask& task) + { + TRACE_EVENT("Worker", "dispatch task"); + + SPDLOG_DEBUG("cuda thread dispatch: execute task {}", task.task_id); + assert(task.is_ready()); + std::lock_guard lock(mutex); + + TaskCtx::current_task = &task; + + // run the code that calls the CUDA API and submits work to *task->m_cuda_stream_idx + auto event = task(); + + cudaEvent_t cuda_event = event_pool.alloc(); + cudaEventRecord(cuda_event, streams[*(task->m_cuda_stream_idx)].cuda_stream); + auto my_event = TaskCtx::create_event(); + events.push(std::make_pair(cuda_event, *my_event)); + SPDLOG_TRACE( + "CudaStreamDispatcher {}: recorded event {}", + streams[*(task->m_cuda_stream_idx)].cuda_stream, + cuda_event); + + // TODO figure out the correct position for this + task.get_pre_event().notify(); + + if(event) + { + event->get_event().waker_id = get_waker_id(); + task.sg_pause(*event); + + task.pre_event.up(); + task.get_pre_event().notify(); + } + else + task.get_post_event().notify(); + + TaskCtx::current_task = nullptr; + } + + /* repeatedly try to find and execute tasks + * until stop-flag is triggered by stop() + */ + void work_loop() + { + SPDLOG_TRACE("Worker {} start work_loop()", this->id); + while(!this->m_stop.load(std::memory_order_consume)) + { + // this->cv.wait(); // TODO fix this by fixing event_ptr notify to wake + + while(TTask* task = this->gather_task()) + { + execute_task(*task); + poll(); // TODO fix where to poll + } + poll(); + } + SPDLOG_TRACE("Worker {} end work_loop()", this->id); + } + + /* find a task that shall be executed next + */ + TTask* gather_task() + { + { + TRACE_EVENT("Worker", "gather_task()"); + TTask* task = nullptr; + + /* STAGE 1: + * + * first, execute all tasks in the ready queue + */ + SPDLOG_TRACE("Worker {}: consume ready queue", id); + if((task = ready_queue.pop())) + return task; + + /* STAGE 2: + * + * after the ready queue is fully consumed, + * try initializing new tasks until one + * of them is found to be ready + */ + SPDLOG_TRACE("Worker {}: try init new tasks", id); + while(this->init_dependencies(task, true)) + if(task) + return task; + + return task; + } + } + + /*! take a task from the emplacement queue and initialize it, + * @param t is set to the task if the new task is ready, + * @param t is set to nullptr if the new task is blocked. + * @param claimed if set, the new task will not be actiated, + * if it is false, activate_task will be called by notify_event + * + * @return false if queue is empty + */ + bool init_dependencies(TTask*& t, bool claimed = true) + { + { + TRACE_EVENT("Worker", "init_dependencies()"); + if(TTask* task = emplacement_queue.pop()) + { + SPDLOG_DEBUG("init task {}", task->task_id); + + task->pre_event.up(); + task->init_graph(); + + if(task->get_pre_event().notify(claimed)) + t = task; + else + { + t = nullptr; + } + + return true; + } + else + return false; + } + } + + //! checks if some cuda calls finished and notify the redGrapes manager + void poll() + { + std::lock_guard lock(mutex); + if(!events.empty()) + { + auto& cuda_event = events.front().first; + auto& event = events.front().second; + + if(cudaEventQuery(cuda_event) == cudaSuccess) + { + SPDLOG_TRACE("cuda event {} ready", cuda_event); + event_pool.free(cuda_event); + event.notify(); + + events.pop(); + } + } + } + }; + +} // namespace redGrapes::dispatch::cuda + +template<> +struct fmt::formatter +{ + constexpr auto parse(format_parse_context& ctx) + { + return ctx.begin(); + } + + template + auto format(redGrapes::dispatch::cuda::CudaTaskProperties const& prop, FormatContext& ctx) + { + return fmt::format_to(ctx.out(), "\"cuda_stream_idx\" : {}", *(prop.m_cuda_stream_idx)); + } +}; diff --git a/redGrapes/dispatch/cuda/event_pool.hpp b/redGrapes/dispatch/cuda/event_pool.hpp index a8123117..87aabec1 100644 --- a/redGrapes/dispatch/cuda/event_pool.hpp +++ b/redGrapes/dispatch/cuda/event_pool.hpp @@ -1,4 +1,4 @@ -/* Copyright 2020 Michael Sippel +/* Copyright 2020-2024 Michael Sippel, Tapish Narwal * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this @@ -7,6 +7,8 @@ #pragma once +#include + #include #include @@ -28,12 +30,6 @@ namespace redGrapes { } - static EventPool& get() - { - static EventPool singleton; - return singleton; - } - ~EventPool() { std::lock_guard lock(mutex); diff --git a/redGrapes/dispatch/cuda/scheduler.hpp b/redGrapes/dispatch/cuda/scheduler.hpp deleted file mode 100644 index b3031fa2..00000000 --- a/redGrapes/dispatch/cuda/scheduler.hpp +++ /dev/null @@ -1,194 +0,0 @@ -/* Copyright 2020-2024 Michael Sippel, Tapish Narwal - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ - -#pragma once - -#include "redGrapes/dispatch/cuda/event_pool.hpp" -#include "redGrapes/dispatch/cuda/task_properties.hpp" -#include "redGrapes/scheduler/event.hpp" -#include "redGrapes/scheduler/scheduler.hpp" -#include "redGrapes/task/property/graph.hpp" - -#include -#include - -#include -#include -#include -#include -#include - -namespace redGrapes -{ - namespace dispatch - { - namespace cuda - { - - thread_local cudaStream_t current_stream; - - // this class is not thread safe - template - struct CudaStreamDispatcher - { - cudaStream_t cuda_stream; - std::recursive_mutex mutex; - std::queue> events; - - CudaStreamDispatcher() - { - cudaStreamCreate(&cuda_stream); - } - - CudaStreamDispatcher(CudaStreamDispatcher const& other) - { - spdlog::warn("CudaStreamDispatcher copy constructor called!"); - } - - ~CudaStreamDispatcher() - { - cudaStreamDestroy(cuda_stream); - } - - void poll() - { - std::lock_guard lock(mutex); - if(!events.empty()) - { - auto& cuda_event = events.front().first; - auto& event = events.front().second; - - if(cudaEventQuery(cuda_event) == cudaSuccess) - { - SPDLOG_TRACE("cuda event {} ready", cuda_event); - EventPool::get().free(cuda_event); - event.notify(); - - events.pop(); - } - } - } - - void dispatch_task(Task& task) - { - std::lock_guard lock(mutex); - - for(auto predecessor : task.in_edges) - { - SPDLOG_TRACE("cudaDispatcher: consider predecessor \"{}\"", predecessor->label); - - if(auto cuda_event = predecessor->cuda_event) - { - SPDLOG_TRACE( - "cudaDispatcher: task {} \"{}\" wait for {}", - task.task_id, - task.label, - *cuda_event); - - cudaStreamWaitEvent(cuda_stream, *cuda_event, 0); - } - } - - SPDLOG_TRACE("CudaScheduler: start {}", task_id); - - current_stream = cuda_stream; - - // run the code that calls the CUDA API and submits work to current_stream - task->run(); - - cudaEvent_t cuda_event = EventPool::get().alloc(); - cudaEventRecord(cuda_event, cuda_stream); - task->cuda_event = cuda_event; - - task->get_pre_event().notify(); - - SPDLOG_TRACE("CudaStreamDispatcher {}: recorded event {}", cuda_stream, cuda_event); - events.push(std::make_pair(cuda_event, task->get_post_event())); - } - }; - - template - struct CudaScheduler : redGrapes::scheduler::IScheduler - { - private: - bool recording; - bool cuda_graph_enabled; - - std::recursive_mutex mutex; - unsigned int current_stream; - std::vector> streams; - - std::function is_cuda_task; - - public: - CudaScheduler( - std::function is_cuda_task, - size_t stream_count = 1, - bool cuda_graph_enabled = false) - : is_cuda_task(is_cuda_task) - , current_stream(0) - , cuda_graph_enabled(cuda_graph_enabled) - { - // reserve to avoid copy constructor of CudaStreamDispatcher - streams.reserve(stream_count); - - for(size_t i = 0; i < stream_count; ++i) - streams.emplace_back(); - - SPDLOG_TRACE("CudaScheduler: use {} streams", streams.size()); - } - - //! submits the call to the cuda runtime - void activate_task(TTask& task) - { - unsigned int stream_id = current_stream; - current_stream = (current_stream + 1) % streams.size(); - - SPDLOG_TRACE("Dispatch Cuda task {} \"{}\" on stream {}", task.task_id, task.label, stream_id); - streams[stream_id].dispatch_task(task); - } - - //! checks if some cuda calls finished and notify the redGrapes manager - void poll() - { - for(size_t stream_id = 0; stream_id < streams.size(); ++stream_id) - streams[stream_id].poll(); - } - - /*! whats the task dependency type for the edge a -> b (task a precedes task b) - * @return true if task b depends on the pre event of task a, false if task b depends on the post event - * of task b. - */ - bool task_dependency_type(TTask const& a, TTask const& b) - { - assert(is_cuda_task(b)); - return is_cuda_task(a); - } - }; - - } // namespace cuda - - } // namespace dispatch - -} // namespace redGrapes - -template<> -struct fmt::formatter -{ - constexpr auto parse(format_parse_context& ctx) - { - return ctx.begin(); - } - - template - auto format(redGrapes::dispatch::cuda::CudaTaskProperties const& prop, FormatContext& ctx) - { - if(auto e = prop.cuda_event) - return fmt::format_to(ctx.out(), "\"cuda_event\" : {}", *e); - else - return fmt::format_to(ctx.out(), "\"cuda_event\" : null"); - } -}; diff --git a/redGrapes/resource/access/io.hpp b/redGrapes/resource/access/io.hpp index 8dc5ec4d..ea07dfba 100644 --- a/redGrapes/resource/access/io.hpp +++ b/redGrapes/resource/access/io.hpp @@ -1,4 +1,4 @@ -/* Copyright 2019 Michael Sippel +/* Copyright 2019-2024 Michael Sippel, Tapish Narwal * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this @@ -11,7 +11,6 @@ #pragma once -#include #include namespace redGrapes diff --git a/redGrapes/scheduler/cuda_thread_scheduler.hpp b/redGrapes/scheduler/cuda_thread_scheduler.hpp new file mode 100644 index 00000000..46bc552f --- /dev/null +++ b/redGrapes/scheduler/cuda_thread_scheduler.hpp @@ -0,0 +1,73 @@ +/* Copyright 2024 Tapish Narwal + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include "redGrapes/TaskFreeCtx.hpp" +#include "redGrapes/dispatch/cuda/cuda_worker.hpp" +#include "redGrapes/scheduler/thread_scheduler.hpp" + +#include + +namespace redGrapes +{ + namespace scheduler + { + template + class CudaThreadScheduler : public ThreadScheduler> + { + private: + unsigned num_streams; + + public: + CudaThreadScheduler(unsigned num_streams = 1) : num_streams{num_streams} + { + } + + void init(WorkerId base_id) override + { + this->m_base_id = base_id; + // TODO check if it was already initalized + if(!this->m_worker_thread) + { + unsigned pu_id = base_id % TaskFreeCtx::n_pus; + // allocate worker with id `i` on arena `i`, + hwloc_obj_t obj = hwloc_get_obj_by_type(TaskFreeCtx::hwloc_ctx.topology, HWLOC_OBJ_PU, pu_id); + TaskFreeCtx::worker_alloc_pool->allocs.emplace_back( + memory::HwlocAlloc(TaskFreeCtx::hwloc_ctx, obj), + REDGRAPES_ALLOC_CHUNKSIZE); + + this->m_worker_thread + = memory::alloc_shared_bind>>( + this->m_base_id, + obj, + this->m_base_id, + num_streams); + } + } + + /*! whats the task dependency type for the edge a -> b (task a precedes task b) + * @return true if task b depends on the pre event of task a, false if task b depends on the post event + * of task b. + */ + bool task_dependency_type(TTask const& a, TTask const& b) + { + if(a.m_cuda_stream_idx) + return true; + else + return false; + ; + } + + cudaStream_t getCudaStream(unsigned idx) const + { + assert(idx < num_streams); + return this->m_worker_thread->worker->streams[idx].cuda_stream; + } + }; + } // namespace scheduler +} // namespace redGrapes diff --git a/redGrapes/scheduler/pool_scheduler.tpp b/redGrapes/scheduler/pool_scheduler.tpp index ad20c4b6..fb89fd8e 100644 --- a/redGrapes/scheduler/pool_scheduler.tpp +++ b/redGrapes/scheduler/pool_scheduler.tpp @@ -116,11 +116,12 @@ namespace redGrapes template bool PoolScheduler::wake(WakerId id) { - if(id == 0) + auto local_waker_id = id - m_base_id; + if(local_waker_id == 0) return cv.notify(); // TODO analyse and optimize - else if(id > 0 && id - m_base_id <= n_workers) - return m_worker_pool->get_worker_thread(id - m_base_id - 1).worker->wake(); + else if(local_waker_id > 0 && local_waker_id <= n_workers) + return m_worker_pool->get_worker_thread(local_waker_id - 1).worker->wake(); else return false; } diff --git a/redGrapesConfig.cmake b/redGrapesConfig.cmake index d79be696..56eed262 100644 --- a/redGrapesConfig.cmake +++ b/redGrapesConfig.cmake @@ -53,7 +53,7 @@ if( NOT TARGET redGrapes ) if(MSVC) target_compile_options(redGrapes INTERFACE /W4 /WX) else() - target_compile_options(redGrapes INTERFACE -Wall -Wextra -Wpedantic) + target_compile_options(redGrapes INTERFACE -Wall -Wextra) endif() endif()