Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
minmingzhu committed Sep 5, 2024
1 parent ce24380 commit caafd27
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 12 deletions.
14 changes: 3 additions & 11 deletions mllib-dal/src/main/native/GPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ sycl::queue getQueue(const ComputeDevice device) {

preview::spmd::communicator<preview::spmd::device_memory_access::usm>
createDalCommunicator(const jint executorNum, const jint rank,
const ccl::string ccl_ip_port) {
const ccl::string ccl_ip_port, std::string breakdown_name) {
auto gpus = get_gpus();

auto t1 = std::chrono::high_resolution_clock::now();
Expand All @@ -127,23 +127,14 @@ createDalCommunicator(const jint executorNum, const jint rank,
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();

logger::println(logger::INFO, "OneCCL singleton init took %f secs",
duration / 1000);

t1 = std::chrono::high_resolution_clock::now();
logger::Logger::getInstance(breakdown_name).printLogToFile("rankID was %d, OneCCL singleton init took %f secs.", rank, duration / 1000 );

auto kvs_attr = ccl::create_kvs_attr();

kvs_attr.set<ccl::kvs_attr_id::ip_port>(ccl_ip_port);

ccl::shared_ptr_class<ccl::kvs> kvs = ccl::create_main_kvs(kvs_attr);

t2 = std::chrono::high_resolution_clock::now();
duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
logger::println(logger::INFO, "OneCCL (native): create kvs took %f secs",
duration / 1000);
sycl::queue queue{gpus[0]};
t1 = std::chrono::high_resolution_clock::now();
auto comm = preview::spmd::make_communicator<preview::spmd::backend::ccl>(
Expand All @@ -152,5 +143,6 @@ createDalCommunicator(const jint executorNum, const jint rank,
duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, create communicator took %f secs.", rank, duration / 1000 );
return comm;
}
2 changes: 1 addition & 1 deletion mllib-dal/src/main/native/GPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ sycl::queue getAssignedGPU(const ComputeDevice device, jint *gpu_indices);

sycl::queue getQueue(const ComputeDevice device);
preview::spmd::communicator<preview::spmd::device_memory_access::usm>
createDalCommunicator(jint executorNum, jint rank, ccl::string ccl_ip_port);
createDalCommunicator(jint executorNum, jint rank, ccl::string ccl_ip_port, std::string breakdown_name);

0 comments on commit caafd27

Please sign in to comment.