diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index c411c8ef9528d7..c979c03dc1b835 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -6100,9 +6100,6 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { if (AI->isFloatingPointOperation()) { if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { - if (Ty->isHalfTy() && STI.getSmVersion() >= 70 && - STI.getPTXVersion() >= 63) - return AtomicExpansionKind::None; if (Ty->isFloatTy()) return AtomicExpansionKind::None; if (Ty->isDoubleTy() && STI.hasAtomAddF64()) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 869b13369e87e1..477789a164ead2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1630,13 +1630,6 @@ defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2; -defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2, hasPTX<63>]>; -defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2, hasPTX<63>]>; -defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2, hasPTX<63>]>; - defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2; defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2 Preds> { let AddedComplexity = 1 in { - def : ATOM23_impl; def : ATOM23_impl; @@ -2027,9 +2017,6 @@ multiclass ATOM2P_impl; def : ATOM23_impl; @@ -2149,8 +2136,6 @@ multiclass ATOM2_add_impl { defm _s32 : ATOM2S_impl; defm _u32 : ATOM2S_impl; defm _u64 : ATOM2S_impl; - defm _f16 : ATOM2S_impl, hasPTX<63>]>; defm _f32 : ATOM2S_impl; defm _f64 : ATOM2S_impl; -; CHECK-NEXT: .reg .b32 %r<4>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u32 %r1, [test_param_0]; -; CHECK-NEXT: ld.param.b16 %rs1, [test_param_3]; -; CHECK-NEXT: atom.add.noftz.f16 %rs2, [%r1], %rs1; -; CHECK-NEXT: ld.param.u32 %r2, [test_param_1]; -; CHECK-NEXT: atom.global.add.noftz.f16 %rs3, [%r2], %rs1; -; CHECK-NEXT: ld.param.u32 %r3, [test_param_2]; -; CHECK-NEXT: atom.shared.add.noftz.f16 %rs4, [%r3], %rs1; -; CHECK-NEXT: ret; -; -; CHECK64-LABEL: test( -; CHECK64: { -; CHECK64-NEXT: .reg .b16 %rs<5>; -; CHECK64-NEXT: .reg .b64 %rd<4>; -; CHECK64-EMPTY: -; CHECK64-NEXT: // %bb.0: -; CHECK64-NEXT: ld.param.u64 %rd1, [test_param_0]; -; CHECK64-NEXT: ld.param.b16 %rs1, [test_param_3]; -; CHECK64-NEXT: atom.add.noftz.f16 %rs2, [%rd1], %rs1; -; CHECK64-NEXT: ld.param.u64 %rd2, [test_param_1]; -; CHECK64-NEXT: atom.global.add.noftz.f16 %rs3, [%rd2], %rs1; -; CHECK64-NEXT: ld.param.u64 %rd3, [test_param_2]; -; CHECK64-NEXT: atom.shared.add.noftz.f16 %rs4, [%rd3], %rs1; -; CHECK64-NEXT: ret; -; -; CHECKPTX62-LABEL: test( -; CHECKPTX62: { -; CHECKPTX62-NEXT: .reg .pred %p<4>; -; CHECKPTX62-NEXT: .reg .b16 %rs<14>; -; CHECKPTX62-NEXT: .reg .b32 %r<49>; -; CHECKPTX62-EMPTY: -; CHECKPTX62-NEXT: // %bb.0: -; CHECKPTX62-NEXT: ld.param.b16 %rs1, [test_param_3]; -; CHECKPTX62-NEXT: ld.param.u32 %r20, [test_param_2]; -; CHECKPTX62-NEXT: ld.param.u32 %r19, [test_param_1]; -; CHECKPTX62-NEXT: ld.param.u32 %r21, [test_param_0]; -; CHECKPTX62-NEXT: and.b32 %r1, %r21, -4; -; CHECKPTX62-NEXT: and.b32 %r22, %r21, 3; -; CHECKPTX62-NEXT: shl.b32 %r2, %r22, 3; -; CHECKPTX62-NEXT: mov.b32 %r23, 65535; -; CHECKPTX62-NEXT: shl.b32 %r24, %r23, %r2; -; CHECKPTX62-NEXT: not.b32 %r3, %r24; -; CHECKPTX62-NEXT: ld.u32 %r46, [%r1]; -; CHECKPTX62-NEXT: $L__BB0_1: // %atomicrmw.start -; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECKPTX62-NEXT: shr.u32 %r25, %r46, %r2; -; CHECKPTX62-NEXT: cvt.u16.u32 %rs2, %r25; -; CHECKPTX62-NEXT: add.rn.f16 %rs4, %rs2, %rs1; -; CHECKPTX62-NEXT: cvt.u32.u16 %r26, %rs4; -; CHECKPTX62-NEXT: shl.b32 %r27, %r26, %r2; -; CHECKPTX62-NEXT: and.b32 %r28, %r46, %r3; -; CHECKPTX62-NEXT: or.b32 %r29, %r28, %r27; -; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r46, %r29; -; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r46; -; CHECKPTX62-NEXT: mov.u32 %r46, %r6; -; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1; -; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end -; CHECKPTX62-NEXT: and.b32 %r7, %r19, -4; -; CHECKPTX62-NEXT: shl.b32 %r30, %r19, 3; -; CHECKPTX62-NEXT: and.b32 %r8, %r30, 24; -; CHECKPTX62-NEXT: shl.b32 %r32, %r23, %r8; -; CHECKPTX62-NEXT: not.b32 %r9, %r32; -; CHECKPTX62-NEXT: ld.global.u32 %r47, [%r7]; -; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start9 -; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECKPTX62-NEXT: shr.u32 %r33, %r47, %r8; -; CHECKPTX62-NEXT: cvt.u16.u32 %rs6, %r33; -; CHECKPTX62-NEXT: add.rn.f16 %rs8, %rs6, %rs1; -; CHECKPTX62-NEXT: cvt.u32.u16 %r34, %rs8; -; CHECKPTX62-NEXT: shl.b32 %r35, %r34, %r8; -; CHECKPTX62-NEXT: and.b32 %r36, %r47, %r9; -; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35; -; CHECKPTX62-NEXT: atom.global.cas.b32 %r12, [%r7], %r47, %r37; -; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r12, %r47; -; CHECKPTX62-NEXT: mov.u32 %r47, %r12; -; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3; -; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end8 -; CHECKPTX62-NEXT: and.b32 %r13, %r20, -4; -; CHECKPTX62-NEXT: shl.b32 %r38, %r20, 3; -; CHECKPTX62-NEXT: and.b32 %r14, %r38, 24; -; CHECKPTX62-NEXT: shl.b32 %r40, %r23, %r14; -; CHECKPTX62-NEXT: not.b32 %r15, %r40; -; CHECKPTX62-NEXT: ld.shared.u32 %r48, [%r13]; -; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start27 -; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECKPTX62-NEXT: shr.u32 %r41, %r48, %r14; -; CHECKPTX62-NEXT: cvt.u16.u32 %rs10, %r41; -; CHECKPTX62-NEXT: add.rn.f16 %rs12, %rs10, %rs1; -; CHECKPTX62-NEXT: cvt.u32.u16 %r42, %rs12; -; CHECKPTX62-NEXT: shl.b32 %r43, %r42, %r14; -; CHECKPTX62-NEXT: and.b32 %r44, %r48, %r15; -; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43; -; CHECKPTX62-NEXT: atom.shared.cas.b32 %r18, [%r13], %r48, %r45; -; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r18, %r48; -; CHECKPTX62-NEXT: mov.u32 %r48, %r18; -; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5; -; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end26 -; CHECKPTX62-NEXT: ret; - %r1 = atomicrmw fadd ptr %dp0, half %val seq_cst - %r2 = atomicrmw fadd ptr addrspace(1) %dp1, half %val seq_cst - %ret = atomicrmw fadd ptr addrspace(3) %dp3, half %val seq_cst - ret void -} - -attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll index 6f2b5dcf47f13b..e99d0fd05e346b 100644 --- a/llvm/test/CodeGen/NVPTX/atomics.ll +++ b/llvm/test/CodeGen/NVPTX/atomics.ll @@ -175,13 +175,6 @@ define float @atomicrmw_add_f32_generic(ptr %addr, float %val) { ret float %ret } -; CHECK-LABEL: atomicrmw_add_f16_generic -define half @atomicrmw_add_f16_generic(ptr %addr, half %val) { -; CHECK: atom.cas - %ret = atomicrmw fadd ptr %addr, half %val seq_cst - ret half %ret -} - ; CHECK-LABEL: atomicrmw_add_f32_addrspace1 define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) { ; CHECK: atom.global.add.f32