Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement multiplexing strategy #638

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ endforeach()

# =============================================================================
# CHIP-SPV CMAKE DEPENDENCIES
if(NOT DEFINED OpenCL_LIBRARY)
option(CHIP_ENABLE_OPENCL "Enable building the OpenCL backend" ON)
option(CHIP_ENABLE_LEVEL0 "Enable building the Level Zero backend" ON)
if(NOT DEFINED OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL)
message(STATUS "OpenCL_LIBRARY was not set. Searching for libOpenCL.so in LD_LIBRARY_PATH")
find_library(OpenCL_LIBRARY NAMES OpenCL PATHS ENV LD_LIBRARY_PATH ./ NO_CACHE)
if(OpenCL_LIBRARY)
Expand All @@ -76,7 +78,7 @@ if(NOT DEFINED OpenCL_LIBRARY)
endif()
endif()

if(NOT DEFINED LevelZero_LIBRARY)
if(NOT DEFINED LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0)
message(STATUS "LevelZero_LIBRARY was not set. Searching for ze_loader.so in LD_LIBRARY_PATH")
find_library(LevelZero_LIBRARY NAMES ze_loader PATHS ENV LD_LIBRARY_PATH ./ NO_CACHE)
if(LevelZero_LIBRARY)
Expand All @@ -87,10 +89,14 @@ if(NOT DEFINED LevelZero_LIBRARY)
endif()
endif()

message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}")
if(CHIP_ENABLE_OPENCL)
message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}")
endif()
if(CHIP_ENABLE_LEVEL0)
message(STATUS "LevelZero_LIBRARY: ${LevelZero_LIBRARY}")
endif()

if(NOT OpenCL_LIBRARY AND NOT LevelZero_LIBRARY)
if(NOT (OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL) AND NOT (LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0))
message(FATAL_ERROR "At least one of OpenCL,Level0 libraries must be available")
endif()

Expand Down Expand Up @@ -128,13 +134,13 @@ set(CHIP_SRC
src/SPIRVFuncInfo.cc
)

if(OpenCL_LIBRARY)
if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL)
list(APPEND CHIP_SRC
src/backend/OpenCL/CHIPBackendOpenCL.cc
src/backend/OpenCL/SVMemoryRegion.cc)
endif()

if(LevelZero_LIBRARY)
if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0)
list(APPEND CHIP_SRC
src/backend/Level0/CHIPBackendLevel0.cc)
endif()
Expand Down Expand Up @@ -295,12 +301,12 @@ endif()

set(CHIP_INTERFACE_LIBS ${PTHREAD_LIBRARY})

if(OpenCL_LIBRARY)
if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL)
list(APPEND CHIP_SPV_DEFINITIONS HAVE_OPENCL)
list(PREPEND CHIP_INTERFACE_LIBS ${OpenCL_LIBRARY})
endif()

if(LevelZero_LIBRARY)
if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0)
list(APPEND CHIP_SPV_DEFINITIONS HAVE_LEVEL0)
list(PREPEND CHIP_INTERFACE_LIBS ${LevelZero_LIBRARY})
endif()
Expand Down Expand Up @@ -422,13 +428,13 @@ set(HIP_OFFLOAD_COMPILE_OPTIONS_BUILD_
list(APPEND HIP_OFFLOAD_LINK_OPTIONS_INSTALL_ "-L${LIB_INSTALL_DIR}" "-lCHIP")
list(APPEND HIP_OFFLOAD_LINK_OPTIONS_BUILD_ "-L${CMAKE_BINARY_DIR}" "-lCHIP")

if(OpenCL_LIBRARY)
if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL)
target_link_options(CHIP PUBLIC -Wl,-rpath,${OpenCL_DIR})
target_link_directories(CHIP PUBLIC ${OpenCL_DIR})
target_link_libraries(CHIP PUBLIC OpenCL)
endif()

if(LevelZero_LIBRARY)
if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0)
target_link_options(CHIP PUBLIC -Wl,-rpath,${LevelZero_DIR})
target_link_directories(CHIP PUBLIC ${LevelZero_DIR})
target_link_libraries(CHIP PUBLIC ze_loader)
Expand Down Expand Up @@ -673,10 +679,10 @@ endif()

# Short Summary
# print if Level Zero or OpenCL are enabbled
if(OpenCL_LIBRARY)
if(OpenCL_LIBRARY AND CHIP_ENABLE_OPENCL)
message(STATUS "OpenCL is enabled: ${OpenCL_LIBRARY}")
endif()

if(LevelZero_LIBRARY)
if(LevelZero_LIBRARY AND CHIP_ENABLE_LEVEL0)
message(STATUS "Level Zero is enabled: ${LevelZero_LIBRARY}")
endif()
2 changes: 1 addition & 1 deletion HIP
2 changes: 1 addition & 1 deletion HIPCC
Submodule HIPCC updated 1 files
+23 −26 src/hipBin_spirv.h
2 changes: 1 addition & 1 deletion bitcode/ROCm-Device-Libs
Submodule ROCm-Device-Libs updated 1 files
+0 −0 t
40 changes: 29 additions & 11 deletions src/CHIPBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ chipstar::CallbackData::CallbackData(hipStreamCallback_t TheCallbackF,
CallbackF(TheCallbackF) {}

void chipstar::CallbackData::execute(hipError_t ResultFromDependency) {
CallbackF(ChipQueue, ResultFromDependency, CallbackArgs);
CallbackF(STREAM(ChipQueue), ResultFromDependency, CallbackArgs);
}

// DeviceVar
Expand Down Expand Up @@ -480,9 +480,9 @@ void chipstar::ExecItem::copyArgs(void **Args) {
}

chipstar::ExecItem::ExecItem(dim3 GridDim, dim3 BlockDim, size_t SharedMem,
hipStream_t ChipQueue)
chipstar::Queue *ChipQueue)
: SharedMem_(SharedMem), GridDim_(GridDim), BlockDim_(BlockDim),
ChipQueue_(static_cast<chipstar::Queue *>(ChipQueue)){};
ChipQueue_(ChipQueue){};

dim3 chipstar::ExecItem::getBlock() { return BlockDim_; }
dim3 chipstar::ExecItem::getGrid() { return GridDim_; }
Expand Down Expand Up @@ -540,11 +540,18 @@ chipstar::Queue *chipstar::Device::getPerThreadDefaultQueue() {
return getPerThreadDefaultQueueNoLock();
}

void chipstar::Device::QueueDeleter::operator()(
chipstar::Queue *q) const noexcept {
if (q)
delete q;
}

chipstar::Queue *chipstar::Device::getPerThreadDefaultQueueNoLock() {
if (!PerThreadDefaultQueue.get()) {
logDebug("PerThreadDefaultQueue is null.. Creating a new queue.");
PerThreadDefaultQueue =
std::unique_ptr<chipstar::Queue>(::Backend->createCHIPQueue(this));
std::unique_ptr<chipstar::Queue, chipstar::Device::QueueDeleter>(
::Backend->createCHIPQueue(this), chipstar::Device::QueueDeleter());
PerThreadStreamUsed_ = true;
PerThreadDefaultQueue.get()->PerThreadQueueForDevice = this;
}
Expand Down Expand Up @@ -799,7 +806,8 @@ void chipstar::Device::eraseModule(chipstar::Module *Module) {
LOCK(DeviceMtx); // SrcModToCompiledMod_
for (auto &Kv : SrcModToCompiledMod_)
if (Kv.second == Module) {
delete Module;
Module->~Module();
free(CHIP_OBJ_TO_HANDLE(Module, ihipModule_t));
SrcModToCompiledMod_.erase(Kv.first);
break;
}
Expand Down Expand Up @@ -1228,7 +1236,8 @@ chipstar::Backend::~Backend() {
UserEvents.clear();
for (auto &Ctx : ChipContexts) {
::Backend->removeContext(Ctx);
delete Ctx;
Ctx->~Context();
free(CHIP_OBJ_TO_HANDLE(Ctx, ihipCtx_t));
}
}

Expand Down Expand Up @@ -1364,7 +1373,7 @@ void chipstar::Backend::addContext(chipstar::Context *ChipContext) {

hipError_t chipstar::Backend::configureCall(dim3 Grid, dim3 Block,
size_t SharedMem,
hipStream_t ChipQueue) {
chipstar::Queue *ChipQueue) {
logDebug("Backend->configureCall(grid=({},{},{}), block=({},{},{}), "
"shared={}, q={}",
Grid.x, Grid.y, Grid.z, Block.x, Block.y, Block.z, SharedMem,
Expand Down Expand Up @@ -1483,9 +1492,9 @@ chipstar::Queue *chipstar::Backend::findQueue(chipstar::Queue *ChipQueue) {
auto Dev = ::Backend->getActiveDevice();
LOCK(Dev->DeviceMtx); // chipstar::Device::ChipQueues_ via getQueuesNoLock()

if (ChipQueue == hipStreamPerThread) {
if (ChipQueue == (chipstar::Queue *)hipStreamPerThread) {
return Dev->getPerThreadDefaultQueueNoLock();
} else if (ChipQueue == hipStreamLegacy) {
} else if (ChipQueue == (chipstar::Queue *)hipStreamLegacy) {
return Dev->getLegacyDefaultQueue();
} else if (ChipQueue == nullptr) {
return Dev->getDefaultQueue();
Expand Down Expand Up @@ -1688,7 +1697,12 @@ void chipstar::Queue::updateLastNode(CHIPGraphNode *NewNode) {
LastNode_ = NewNode;
}

void chipstar::Queue::initCaptureGraph() { CaptureGraph_ = new CHIPGraph(); }
void chipstar::Queue::initCaptureGraph() {
void *mem = malloc(sizeof(ihipDispatch) + sizeof(CHIPGraph));
CHIPGraph *G = CHIP_HANDLE_TO_OBJ(mem, CHIPGraph);
G = new (G) CHIPGraph();
CaptureGraph_ = G;
}

std::shared_ptr<chipstar::Event>
chipstar::Queue::RegisteredVarCopy(chipstar::ExecItem *ExecItem,
Expand Down Expand Up @@ -1883,4 +1897,8 @@ void chipstar::Queue::addCallback(hipStreamCallback_t Callback,

CHIPGraph *chipstar::Queue::getCaptureGraph() const {
return static_cast<CHIPGraph *>(CaptureGraph_);
}
}

hipStream_t STREAM(chipstar::Queue *Queue) noexcept {
return reinterpret_cast<hipStream_t>(Queue->asDispatchableObject());
}
28 changes: 18 additions & 10 deletions src/CHIPBackend.hh
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ public:
void markHasInitializer(bool State = true) { HasInitializer_ = State; }
};

class Event : public ihipEvent_t {
class Event {
protected:
bool TrackCalled_ = false;
bool UserEvent_ = false;
Expand Down Expand Up @@ -654,6 +654,10 @@ protected:
virtual ~Event(){};

public:
static void deleter(chipstar::Event *e) {
e->~Event();
free(CHIP_OBJ_TO_HANDLE(e, ihipEvent_t));
}
void markTracked() { TrackCalled_ = true; }
bool isTrackCalled() { return TrackCalled_; }
void setTrackCalled(bool Val) { TrackCalled_ = Val; }
Expand Down Expand Up @@ -846,7 +850,7 @@ public:
* ROCclr - amd::Program
* CUDA - CUmodule
*/
class Module : public ihipModule_t {
class Module {
/// Flag for the allocation state of the device variables. True if
/// all variables have space allocated for this module for the
/// device this module is attached to. False implies that
Expand Down Expand Up @@ -1006,7 +1010,7 @@ public:
/**
* @brief Contains information about the function on the host and device
*/
class Kernel : public ihipModuleSymbol_t {
class Kernel {
protected:
/**
* @brief hidden default constructor. Only derived type constructor should be
Expand Down Expand Up @@ -1180,7 +1184,7 @@ public:
* @param chip_queue_
*/
ExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem,
hipStream_t ChipQueue);
chipstar::Queue *ChipQueue);

/**
* @brief Set the chipstar::Kernel object
Expand Down Expand Up @@ -1290,7 +1294,11 @@ public:
std::vector<chipstar::Queue *> getQueuesNoLock() { return ChipQueues_; }

chipstar::Queue *LegacyDefaultQueue;
inline static thread_local std::unique_ptr<chipstar::Queue>
struct QueueDeleter {
void operator()(chipstar::Queue *q) const noexcept;
};

inline static thread_local std::unique_ptr<chipstar::Queue, QueueDeleter>
PerThreadDefaultQueue;

/**
Expand Down Expand Up @@ -1611,7 +1619,7 @@ protected:
* multiple devices. Provides for creation of additional queues, events, and
* interaction with devices.
*/
class Context : public ihipCtx_t {
class Context {
protected:
int RefCount_;
chipstar::Device *ChipDevice_;
Expand Down Expand Up @@ -1802,7 +1810,7 @@ public:

virtual chipstar::ExecItem *createExecItem(dim3 GirdDim, dim3 BlockDim,
size_t SharedMem,
hipStream_t ChipQueue) = 0;
chipstar::Queue *ChipQueue) = 0;

int getPerThreadQueuesActive();
std::mutex SetActiveMtx;
Expand Down Expand Up @@ -1956,7 +1964,7 @@ public:
* @return hipError_t
*/
hipError_t configureCall(dim3 GridDim, dim3 BlockDim, size_t SharedMem,
hipStream_t ChipQueue);
chipstar::Queue *ChipQueue);

/**
* @brief Return a device which meets or exceeds the requirements
Expand Down Expand Up @@ -2029,11 +2037,11 @@ public:
/**
* @brief Queue class for submitting kernels to for execution
*/
class Queue : public ihipStream_t {
class Queue : public Dispatchable {
protected:
hipStreamCaptureStatus CaptureStatus_ = hipStreamCaptureStatusNone;
hipStreamCaptureMode CaptureMode_ = hipStreamCaptureModeGlobal;
hipGraph_t CaptureGraph_;
CHIPGraph *CaptureGraph_;
std::mutex LastEventMtx;
/// @brief node for creating a dependency chain between subsequent record
/// events when in graph capture mode
Expand Down
Loading
Loading