Skip to content

Commit

Permalink
Updates after review
Browse files Browse the repository at this point in the history
  • Loading branch information
JonPsson1 committed Jan 10, 2024
1 parent 76bf0f9 commit 27f7015
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 30 deletions.
13 changes: 10 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,12 +343,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
if (N->getOpcode() == ISD::ATOMIC_LOAD) {
ISD::LoadExtType ETy = cast<AtomicSDNode>(N)->getExtensionType();
if (ETy == ISD::NON_EXTLOAD) {
if (TLI.getExtendForAtomicOps() == ISD::SIGN_EXTEND)
switch (TLI.getExtendForAtomicOps()) {
case ISD::SIGN_EXTEND:
ETy = ISD::SEXTLOAD;
else if (TLI.getExtendForAtomicOps() == ISD::ZERO_EXTEND)
break;
case ISD::ZERO_EXTEND:
ETy = ISD::ZEXTLOAD;
else
break;
case ISD::ANY_EXTEND:
ETy = ISD::EXTLOAD;
break;
default:
llvm_unreachable("Invalid atomic op extension");
}
}
cast<AtomicSDNode>(Res)->setExtensionType(ETy);
}
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1516,13 +1516,10 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
MachineMemOperand *MMO = MemAccess->getMemOperand();
assert(MMO && "Expected a memory operand.");

// These instructions are not atomic.
if (MMO->isAtomic())
return false;

// The memory access must have a proper alignment and no index register.
// ATOMIC_LOADs do not have the offset operand.
if (MemAccess->getAlign().value() < StoreSize ||
!MemAccess->getOffset().isUndef())
(!MMO->isAtomic() && !MemAccess->getOffset().isUndef()))
return false;

// The MMO must not have an unaligned offset.
Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4507,7 +4507,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}

SDValue SystemZTargetLowering::lowerATOMIC_I128_LDST(SDValue Op,
SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
Expand Down Expand Up @@ -5637,12 +5637,11 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
return GS.getNode(DAG, SDLoc(BVN));
}

bool SystemZTargetLowering::isVectorElementLoad(SDValue Op, EVT VecVT) const {
bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
return true;
if (auto *AL = dyn_cast<AtomicSDNode>(Op))
if (AL->getOpcode() == ISD::ATOMIC_LOAD && SDValue(AL, 0).hasOneUse() &&
AL->getMemoryVT() == VecVT.getScalarType())
if (AL->getOpcode() == ISD::ATOMIC_LOAD)
return true;
if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
return true;
Expand Down Expand Up @@ -5681,13 +5680,13 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single, VT)))
if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);

// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
if (!isVectorElementLoad(Elem, VT)) {
if (!isVectorElementLoad(Elem)) {
AllLoads = false;
break;
}
Expand Down Expand Up @@ -5759,7 +5758,7 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
if (isVectorElementLoad(Elems[I], VT)) {
if (isVectorElementLoad(Elems[I])) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
Expand Down Expand Up @@ -6122,7 +6121,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
case ISD::ATOMIC_LOAD:
return lowerATOMIC_I128_LDST(Op, DAG);
return lowerATOMIC_LDST_I128(Op, DAG);
case ISD::ATOMIC_LOAD_ADD:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_I128_LDST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
Expand All @@ -704,7 +704,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
bool isVectorElementLoad(SDValue Op, EVT VecVT) const;
bool isVectorElementLoad(SDValue Op) const;
SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/SystemZ/SystemZInstrFP.td
Original file line number Diff line number Diff line change
Expand Up @@ -504,17 +504,17 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;

defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, nonatomic_ld, 4>;
defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, nonatomic_ld, 8>;
defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
}

// Fused multiply-subtract.
let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;

defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, nonatomic_ld, 4>;
defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, nonatomic_ld, 8>;
defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
}

// Division.
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/SystemZ/SystemZOperators.td
Original file line number Diff line number Diff line change
Expand Up @@ -607,10 +607,6 @@ def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;

def nonatomic_ld : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return !cast<LoadSDNode>(N)->isAtomic();
}]>;

// Non-volatile stores.
class NonvolatileStore<SDPatternOperator store>
: PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
Expand Down
42 changes: 38 additions & 4 deletions llvm/test/CodeGen/SystemZ/atomic-memofolds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,11 @@ define i64 @f14(i64 %a, ptr %src) {
ret i64 %sub
}

; Check that maeb (reg/mem) is *not* used for an atomic load.
define float @f15(float %f1, ptr %ptr, float %acc) {
; CHECK-LABEL: f15:
; CHECK: # %bb.0:
; CHECK-NEXT: lde %f1, 0(%r2)
; CHECK-NEXT: wfmasb %f0, %f0, %f1, %f2
; CHECK-NEXT: maeb %f2, %f0, 0(%r2)
; CHECK-NEXT: ldr %f0, %f2
; CHECK-NEXT: br %r14
%f2 = load atomic float, ptr %ptr seq_cst, align 4
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
Expand Down Expand Up @@ -387,6 +386,39 @@ define void @f25_b(ptr %src, ptr %dst) {
ret void
}

; Do *not* use vlrep for an extending load.
define <4 x i32> @f25_c(ptr %ptr) {
; CHECK-LABEL: f25_c:
; CHECK: # %bb.0:
; CHECK-NEXT: lb %r0, 0(%r2)
; CHECK-NEXT: vlvgp %v0, %r0, %r0
; CHECK-NEXT: vrepf %v24, %v0, 1
; CHECK-NEXT: br %r14
%L = load atomic i8, ptr %ptr seq_cst, align 4
%S = sext i8 %L to i32
%val = insertelement <4 x i32> undef, i32 %S, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}

; Do *not* use vlrep if there is another scalar use.
define <4 x i32> @f25_d(ptr %ptr, ptr %dst) {
; CHECK-LABEL: f25_d:
; CHECK: # %bb.0:
; CHECK-NEXT: l %r0, 0(%r2)
; CHECK-NEXT: vlvgp %v0, %r0, %r0
; CHECK-NEXT: vrepf %v24, %v0, 1
; CHECK-NEXT: st %r0, 0(%r3)
; CHECK-NEXT: br %r14
%L = load atomic i32, ptr %ptr seq_cst, align 4
store i32 %L, ptr %dst, align 4
%val = insertelement <4 x i32> undef, i32 %L, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}

define void @f26(ptr %src, ptr %dst) {
; CHECK-LABEL: f26:
; CHECK: # %bb.0:
Expand All @@ -412,6 +444,8 @@ define void @f26_b(ptr %src, ptr %dst) {
ret void
}



; Vector Load logical element and zero.
define <16 x i8> @f27(ptr %ptr) {
; CHECK-LABEL: f27:
Expand Down Expand Up @@ -607,7 +641,7 @@ define void @f43(ptr %ptr) {
define void @f44(ptr %ptr) {
; CHECK-LABEL: f44:
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI48_0
; CHECK-NEXT: larl %r1, .LCPI50_0
; CHECK-NEXT: ld %f0, 0(%r1)
; CHECK-NEXT: std %f0, 0(%r2)
; CHECK-NEXT: bcr 14, %r0
Expand Down

0 comments on commit 27f7015

Please sign in to comment.