Skip to content

Commit

Permalink
AMDGPU: Improve cost handling of canonicalize (llvm#101479)
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Aug 1, 2024
1 parent 5ad15e5 commit e7630a0
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 164 deletions.
11 changes: 10 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::copysign:
case Intrinsic::canonicalize:
// There's a small benefit to using vector ops in the legalized code.
case Intrinsic::round:
case Intrinsic::uadd_sat:
Expand Down Expand Up @@ -742,15 +743,23 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
break;
case Intrinsic::copysign:
return NElts * getFullRateInstrCost();
case Intrinsic::canonicalize: {
InstRate =
SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
break;
}
case Intrinsic::uadd_sat:
case Intrinsic::usub_sat:
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::ssub_sat: {
static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
NElts = 1;
break;
}
default:
break;
}

return LT.first * NElts * InstRate;
}
Expand Down
Loading

0 comments on commit e7630a0

Please sign in to comment.