Skip to content

Commit

Permalink
merge main into amd-staging
Browse files Browse the repository at this point in the history
Change-Id: I92e22feca66c12ccbde2e67e0a143473926c1e72
  • Loading branch information
Jenkins committed Sep 2, 2024
2 parents cb02a4d + 837ee5b commit 36ec6e9
Show file tree
Hide file tree
Showing 14 changed files with 2,115 additions and 1,700 deletions.
1 change: 1 addition & 0 deletions lld/ELF/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,7 @@ def: J<"plugin-opt=sample-profile=">,
def: F<"plugin-opt=save-temps">, Alias<save_temps>, HelpText<"Alias for --save-temps">;
def plugin_opt_stats_file: J<"plugin-opt=stats-file=">,
HelpText<"Filename to write LTO statistics to">;
def: J<"plugin-opt=time-trace=">, Alias<time_trace_eq>, HelpText<"Alias for --time-trace=">;
def: F<"plugin-opt=thinlto-emit-imports-files">,
Alias<thinlto_emit_imports_files>,
HelpText<"Alias for --thinlto-emit-imports-files">;
Expand Down
19 changes: 19 additions & 0 deletions lld/test/ELF/lto/time-trace.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; REQUIRES: x86
; RUN: llvm-as %s -o %t.o
; RUN: ld.lld -m elf_x86_64 -shared %t.o -o %t.so --plugin-opt=time-trace=%t.trace.json
; RUN: FileCheck --input-file=%t.trace.json %s
;; Print to stdout
; RUN: ld.lld -m elf_x86_64 -shared %t.o -o %t.so --plugin-opt=time-trace=- | \
; RUN: FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Make sure the content is correct
; CHECK: "traceEvents"
; Make sure LTO events are recorded
; CHECK-SAME: "name":"LTO"

define void @foo() {
ret void
}
31 changes: 27 additions & 4 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,9 +473,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
} else {
setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
MVT::f16, Legal);
for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND,
ISD::STRICT_LRINT, ISD::STRICT_LLRINT})
setOperationAction(Op, MVT::f16, Custom);
setOperationAction(ISD::FABS, MVT::f16, Custom);
setOperationAction(ISD::FNEG, MVT::f16, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
Expand Down Expand Up @@ -6781,7 +6782,29 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
case ISD::LRINT:
case ISD::LLRINT:
return lowerVectorXRINT(Op, DAG, Subtarget);
if (Op.getValueType().isVector())
return lowerVectorXRINT(Op, DAG, Subtarget);
[[fallthrough]];
case ISD::LROUND:
case ISD::LLROUND: {
assert(Op.getOperand(0).getValueType() == MVT::f16 &&
"Unexpected custom legalisation");
SDLoc DL(Op);
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
}
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT:
case ISD::STRICT_LROUND:
case ISD::STRICT_LLROUND: {
assert(Op.getOperand(1).getValueType() == MVT::f16 &&
"Unexpected custom legalisation");
SDLoc DL(Op);
SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
{Op.getOperand(0), Op.getOperand(1)});
return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
{Ext.getValue(1), Ext.getValue(0)});
}
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfoF.td
Original file line number Diff line number Diff line change
Expand Up @@ -799,8 +799,8 @@ def : Pat<(i64 (any_lrint FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;
def : Pat<(i64 (any_llrint FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;

// float->int64 rounded to neartest with ties rounded away from zero.
def : Pat<(i64 (any_lround FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;
def : Pat<(i64 (any_llround FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;
def : Pat<(i64 (any_lround FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_RMM)>;
def : Pat<(i64 (any_llround FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_RMM)>;

// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W_INX $rs1, FRM_DYN)>;
Expand Down
28 changes: 0 additions & 28 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
Original file line number Diff line number Diff line change
Expand Up @@ -605,12 +605,6 @@ let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in {
def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;

// half->int32 with current rounding mode.
def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;

// half->int32 rounded to nearest with ties rounded away from zero.
def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;

// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
Expand All @@ -621,12 +615,6 @@ let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx] in {
def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;

// half->int32 with current rounding mode.
def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;

// half->int32 rounded to nearest with ties rounded away from zero.
def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;

// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_W_INX $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_WU_INX $rs1, FRM_DYN), FRM_DYN)>;
Expand All @@ -637,14 +625,6 @@ let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in {
def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;
def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1, FRM_RNE), FRM_RTZ)>;

// half->int64 with current rounding mode.
def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;
def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_DYN)>;

// half->int64 rounded to nearest with ties rounded away from zero.
def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;
def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1, FRM_RNE), FRM_RMM)>;

// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(f16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(f16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_LU $rs1, FRM_DYN), FRM_DYN)>;
Expand All @@ -655,14 +635,6 @@ let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV64] in {
def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;
def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RTZ)>;

// half->int64 with current rounding mode.
def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;
def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_DYN)>;

// half->int64 rounded to nearest with ties rounded away from zero.
def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;
def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1, FRM_RNE), FRM_RMM)>;

// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_L_INX $rs1, FRM_DYN), FRM_DYN)>;
def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_LU_INX $rs1, FRM_DYN), FRM_DYN)>;
Expand Down
110 changes: 84 additions & 26 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,9 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
DstLT.second.getSizeInBits()))
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

// The split cost is handled by the base getCastInstrCost
assert((SrcLT.first == 1) && (DstLT.first == 1) && "Illegal type");

int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");

Expand Down Expand Up @@ -1118,34 +1121,89 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return Cost;
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
// For fp vector to mask, we use:
// vfncvt.rtz.x.f.w v9, v8
// vand.vi v8, v9, 1
// vmsne.vi v0, v8, 0
if (Dst->getScalarSizeInBits() == 1)
return 3;

if (std::abs(PowDiff) <= 1)
return 1;
case ISD::FP_TO_UINT: {
unsigned IsSigned = ISD == ISD::FP_TO_SINT;
unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
unsigned FWCVT =
IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
unsigned FNCVT =
IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
unsigned SrcEltSize = Src->getScalarSizeInBits();
unsigned DstEltSize = Dst->getScalarSizeInBits();
InstructionCost Cost = 0;
if ((SrcEltSize == 16) &&
(!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
// If the target only supports zvfhmin or it is fp16-to-i64 conversion
// pre-widening to f32 and then convert f32 to integer
VectorType *VecF32Ty =
VectorType::get(Type::getFloatTy(Dst->getContext()),
cast<VectorType>(Dst)->getElementCount());
std::pair<InstructionCost, MVT> VecF32LT =
getTypeLegalizationCost(VecF32Ty);
Cost +=
VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
VecF32LT.second, CostKind);
Cost += getCastInstrCost(Opcode, Dst, VecF32Ty, CCH, CostKind, I);
return Cost;
}
if (DstEltSize == SrcEltSize)
Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
else if (DstEltSize > SrcEltSize)
Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
else { // (SrcEltSize > DstEltSize)
// First do a narrowing conversion to an integer half the size, then
// truncate if needed.
MVT ElementVT = MVT::getIntegerVT(SrcEltSize / 2);
MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
Cost += getRISCVInstructionCost(FNCVT, VecVT, CostKind);
if ((SrcEltSize / 2) > DstEltSize) {
Type *VecTy = EVT(VecVT).getTypeForEVT(Dst->getContext());
Cost +=
getCastInstrCost(Instruction::Trunc, Dst, VecTy, CCH, CostKind, I);
}
}
return Cost;
}
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: {
unsigned IsSigned = ISD == ISD::SINT_TO_FP;
unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
unsigned SrcEltSize = Src->getScalarSizeInBits();
unsigned DstEltSize = Dst->getScalarSizeInBits();

// Counts of narrow/widen instructions.
return std::abs(PowDiff);
InstructionCost Cost = 0;
if ((DstEltSize == 16) &&
(!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
// If the target only supports zvfhmin or it is i64-to-fp16 conversion
// it is converted to f32 and then converted to f16
VectorType *VecF32Ty =
VectorType::get(Type::getFloatTy(Dst->getContext()),
cast<VectorType>(Dst)->getElementCount());
std::pair<InstructionCost, MVT> VecF32LT =
getTypeLegalizationCost(VecF32Ty);
Cost += getCastInstrCost(Opcode, VecF32Ty, Src, CCH, CostKind, I);
Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
DstLT.second, CostKind);
return Cost;
}

case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// For mask vector to fp, we should use the following instructions:
// vmv.v.i v8, 0
// vmerge.vim v8, v8, -1, v0
// vfcvt.f.x.v v8, v8
if (Src->getScalarSizeInBits() == 1)
return 3;

if (std::abs(PowDiff) <= 1)
return 1;
// Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
// so it only need two conversion.
return 2;
if (DstEltSize == SrcEltSize)
Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
else if (DstEltSize > SrcEltSize) {
if ((DstEltSize / 2) > SrcEltSize) {
VectorType *VecTy =
VectorType::get(IntegerType::get(Dst->getContext(), DstEltSize / 2),
cast<VectorType>(Dst)->getElementCount());
unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
Cost += getCastInstrCost(Op, VecTy, Src, CCH, CostKind, I);
}
Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
} else
Cost += getRISCVInstructionCost(FNCVT, DstLT.second, CostKind);
return Cost;
}
}
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6817,6 +6817,9 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
if (auto *Br = dyn_cast_or_null<BranchInst>(Op)) {
BasicBlock *ThenBB = Br->getSuccessor(0);
BasicBlock *ElseBB = Br->getSuccessor(1);
// Don't considers branches leaving the loop for simplification.
if (!TheLoop->contains(ThenBB) || !TheLoop->contains(ElseBB))
continue;
bool ThenEmpty = IsEmptyBlock(ThenBB);
bool ElseEmpty = IsEmptyBlock(ElseBB);
if ((ThenEmpty && ElseEmpty) ||
Expand Down
Loading

0 comments on commit 36ec6e9

Please sign in to comment.