Skip to content

Commit

Permalink
Merge branch 'chao/xccl2' into chao/xccl3
Browse files Browse the repository at this point in the history
  • Loading branch information
Chao1Han committed Dec 31, 2024
2 parents 342517a + 106adb5 commit eb7d869
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions src/xccl/ProcessGroupXCCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ bool computeLengthsAndCheckAndGetFlat(
return isFlat;
}

bool check_same_size(const std::vector<at::Tensor>& input_tensors) {
bool checkSameSize(const std::vector<at::Tensor>& input_tensors) {
for (const auto& input_tensor : input_tensors) {
if (!input_tensors[0].is_same_size(input_tensor)) {
return false;
Expand Down Expand Up @@ -147,7 +147,8 @@ ccl::reduction getXcclReduceOp(const ReduceOp& reduceOp, at::Tensor& input) {
// Map sum to max for bool tensors to avoid overflow issues with sum.
return ccl::reduction::max;
}
// Use SUM emu AVG due to oneCCL not support AVG
// Use SUM emu AVG due to oneCCL not support AVG.
// oneCCL is expected to support avg in basekit 2025.2 release.
if (reduceOp == ReduceOp::AVG) {
return ccl::reduction::sum;
}
Expand Down Expand Up @@ -898,6 +899,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allreduce_impl(
comm,
ccl::create_stream(stream.queue()));
// Use SUM emu AVG due to oneCCL not support AVG
// oneCCL is expected to support avg in basekit 2025.2 release.
if (opts.reduceOp == ReduceOp::AVG) {
auto divisor = getSize();
output.div_(divisor);
Expand Down Expand Up @@ -951,6 +953,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allreduce(
comm,
ccl::create_stream(stream.queue()));
// Use SUM emu AVG due to oneCCL not support AVG
// oneCCL is expected to support avg in basekit 2025.2 release.
if (opts.reduceOp == ReduceOp::AVG) {
auto divisor = getSize();
output.div_(divisor);
Expand Down Expand Up @@ -1002,6 +1005,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allreduce_coalesced(
comm,
ccl::create_stream(stream.queue()));
// Use SUM emu AVG due to oneCCL not support AVG
// oneCCL is expected to support avg in basekit 2025.2 release.
if (opts.reduceOp == ReduceOp::AVG) {
auto divisor = getSize();
output.div_(divisor);
Expand Down Expand Up @@ -1174,6 +1178,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::_reduce_oop(
comm,
ccl::create_stream(stream.queue()));
// Use SUM emu AVG due to oneCCL not support AVG
// oneCCL is expected to support avg in basekit 2025.2 release.
if (opts.reduceOp == ReduceOp::AVG && getRank() == root) {
auto divisor = getSize();
output.div_(divisor);
Expand Down Expand Up @@ -1213,7 +1218,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allgather(
-1, // globalRankStride
this->getSize()); // worldSize

bool same_size = check_same_size(outputTensors_);
bool same_size = checkSameSize(outputTensors_);
if (same_size) {
// Flatten a vector of tensors into a single, stacked tensor.
at::Tensor outputFlattened = newLikeFlat(outputTensors_);
Expand Down Expand Up @@ -1375,7 +1380,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::reduce_scatter(
-1, // globalRankStride
this->getSize()); // worldSize

bool same_size = check_same_size(inputTensors_);
bool same_size = checkSameSize(inputTensors_);
if (same_size) {
// Flatten a vector of tensors into a single, stacked tensor.
at::Tensor inputFlattened = newLikeFlat(inputTensors_);
Expand All @@ -1399,6 +1404,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::reduce_scatter(
comm,
ccl::create_stream(stream.queue()));
// Use SUM emu AVG due to oneCCL not support AVG
// oneCCL is expected to support avg in basekit 2025.2 release.
if (opts.reduceOp == ReduceOp::AVG) {
auto divisor = getSize();
output.div_(divisor);
Expand Down Expand Up @@ -1487,6 +1493,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::_reduce_scatter_base(
comm,
ccl::create_stream(stream.queue()));
// Use SUM emu AVG due to oneCCL not support AVG
// oneCCL is expected to support avg in basekit 2025.2 release.
if (opts.reduceOp == ReduceOp::AVG) {
auto divisor = getSize();
output.div_(divisor);
Expand Down Expand Up @@ -1521,6 +1528,7 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::reduce_scatter_tensor_coalesced(
comm,
ccl::create_stream(stream.queue()));
// Use SUM emu AVG due to oneCCL not support AVG
// oneCCL is expected to support avg in basekit 2025.2 release.
if (opts.reduceOp == ReduceOp::AVG) {
auto divisor = getSize();
output.div_(divisor);
Expand Down

0 comments on commit eb7d869

Please sign in to comment.