Merge branch 'chao/xccl2' into chao/xccl3

intel · Dec 31, 2024 · eb7d869 · eb7d869
2 parents 342517a + 106adb5
commit eb7d869
Showing 1 changed file with 12 additions and 4 deletions.
diff --git a/src/xccl/ProcessGroupXCCL.cpp b/src/xccl/ProcessGroupXCCL.cpp
@@ -69,7 +69,7 @@ bool computeLengthsAndCheckAndGetFlat(
   return isFlat;
 }
 
-bool check_same_size(const std::vector<at::Tensor>& input_tensors) {
+bool checkSameSize(const std::vector<at::Tensor>& input_tensors) {
   for (const auto& input_tensor : input_tensors) {
     if (!input_tensors[0].is_same_size(input_tensor)) {
       return false;
@@ -147,7 +147,8 @@ ccl::reduction getXcclReduceOp(const ReduceOp& reduceOp, at::Tensor& input) {
       // Map sum to max for bool tensors to avoid overflow issues with sum.
       return ccl::reduction::max;
     }
-    // Use SUM emu AVG due to oneCCL not support AVG
+    // Use SUM emu AVG due to oneCCL not support AVG.
+    // oneCCL is expected to support avg in basekit 2025.2 release.
     if (reduceOp == ReduceOp::AVG) {
       return ccl::reduction::sum;
     }
@@ -898,6 +899,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allreduce_impl(
             comm,
             ccl::create_stream(stream.queue()));
         // Use SUM emu AVG due to oneCCL not support AVG
+        // oneCCL is expected to support avg in basekit 2025.2 release.
         if (opts.reduceOp == ReduceOp::AVG) {
           auto divisor = getSize();
           output.div_(divisor);
@@ -951,6 +953,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allreduce(
             comm,
             ccl::create_stream(stream.queue()));
         // Use SUM emu AVG due to oneCCL not support AVG
+        // oneCCL is expected to support avg in basekit 2025.2 release.
         if (opts.reduceOp == ReduceOp::AVG) {
           auto divisor = getSize();
           output.div_(divisor);
@@ -1002,6 +1005,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allreduce_coalesced(
             comm,
             ccl::create_stream(stream.queue()));
         // Use SUM emu AVG due to oneCCL not support AVG
+        // oneCCL is expected to support avg in basekit 2025.2 release.
         if (opts.reduceOp == ReduceOp::AVG) {
           auto divisor = getSize();
           output.div_(divisor);
@@ -1174,6 +1178,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::_reduce_oop(
             comm,
             ccl::create_stream(stream.queue()));
         // Use SUM emu AVG due to oneCCL not support AVG
+        // oneCCL is expected to support avg in basekit 2025.2 release.
         if (opts.reduceOp == ReduceOp::AVG && getRank() == root) {
           auto divisor = getSize();
           output.div_(divisor);
@@ -1213,7 +1218,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allgather(
       -1, // globalRankStride
       this->getSize()); // worldSize
 
-  bool same_size = check_same_size(outputTensors_);
+  bool same_size = checkSameSize(outputTensors_);
   if (same_size) {
     // Flatten a vector of tensors into a single, stacked tensor.
     at::Tensor outputFlattened = newLikeFlat(outputTensors_);
@@ -1375,7 +1380,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::reduce_scatter(
       -1, // globalRankStride
       this->getSize()); // worldSize
 
-  bool same_size = check_same_size(inputTensors_);
+  bool same_size = checkSameSize(inputTensors_);
   if (same_size) {
     // Flatten a vector of tensors into a single, stacked tensor.
     at::Tensor inputFlattened = newLikeFlat(inputTensors_);
@@ -1399,6 +1404,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::reduce_scatter(
               comm,
               ccl::create_stream(stream.queue()));
           // Use SUM emu AVG due to oneCCL not support AVG
+          // oneCCL is expected to support avg in basekit 2025.2 release.
           if (opts.reduceOp == ReduceOp::AVG) {
             auto divisor = getSize();
             output.div_(divisor);
@@ -1487,6 +1493,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::_reduce_scatter_base(
             comm,
             ccl::create_stream(stream.queue()));
         // Use SUM emu AVG due to oneCCL not support AVG
+        // oneCCL is expected to support avg in basekit 2025.2 release.
         if (opts.reduceOp == ReduceOp::AVG) {
           auto divisor = getSize();
           output.div_(divisor);
@@ -1521,6 +1528,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::reduce_scatter_tensor_coalesced(
             comm,
             ccl::create_stream(stream.queue()));
         // Use SUM emu AVG due to oneCCL not support AVG
+        // oneCCL is expected to support avg in basekit 2025.2 release.
         if (opts.reduceOp == ReduceOp::AVG) {
           auto divisor = getSize();
           output.div_(divisor);