From 6977646a25c8087ec1817cd8146492b9c8bdb22a Mon Sep 17 00:00:00 2001 From: Gavin Zhao Date: Fri, 5 Jan 2024 09:05:01 -0500 Subject: [PATCH] pytorch: update to v2.1.2 Changelog available [here](https://github.com/pytorch/pytorch/releases/tag/v2.1.2). Test plan: Ran llamaindex and Fooocus on `gfx1030`. Signed-off-by: Gavin Zhao --- packages/py/pytorch/abi_symbols | 12 +- packages/py/pytorch/abi_used_libs | 7 +- packages/py/pytorch/abi_used_symbols | 564 ++++++++---------- .../files/enable-hipsparse-descriptors.patch | 116 ++++ .../fully-disable-rocm-kernel-asserts.patch | 303 ++++++++++ packages/py/pytorch/files/rocm-6.0.patch | 57 ++ packages/py/pytorch/package.yml | 55 +- packages/py/pytorch/pspec_x86_64.xml | 26 +- 8 files changed, 783 insertions(+), 357 deletions(-) create mode 100644 packages/py/pytorch/files/enable-hipsparse-descriptors.patch create mode 100644 packages/py/pytorch/files/fully-disable-rocm-kernel-asserts.patch create mode 100644 packages/py/pytorch/files/rocm-6.0.patch diff --git a/packages/py/pytorch/abi_symbols b/packages/py/pytorch/abi_symbols index f313a3c0debc..b7593d0e33a7 100644 --- a/packages/py/pytorch/abi_symbols +++ b/packages/py/pytorch/abi_symbols @@ -42506,10 +42506,10 @@ libtorch_hip.so:_ZN2at4cuda4blas12getrsBatchedIN3c107complexIfEEEEvPv18hipblasOp libtorch_hip.so:_ZN2at4cuda4blas12getrsBatchedIdEEvPv18hipblasOperation_tiiPPT_iPiS7_iS8_i libtorch_hip.so:_ZN2at4cuda4blas12getrsBatchedIfEEvPv18hipblasOperation_tiiPPT_iPiS7_iS8_i libtorch_hip.so:_ZN2at4cuda4blas19_cublasGetErrorEnumE15hipblasStatus_t -libtorch_hip.so:_ZN2at4cuda4blas4trsmIN3c107complexIdEEEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_PSB_iSE_i -libtorch_hip.so:_ZN2at4cuda4blas4trsmIN3c107complexIfEEEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_PSB_iSE_i -libtorch_hip.so:_ZN2at4cuda4blas4trsmIdEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_PS8_iSB_i -libtorch_hip.so:_ZN2at4cuda4blas4trsmIfEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_PS8_iSB_i +libtorch_hip.so:_ZN2at4cuda4blas4trsmIN3c107complexIdEEEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_SD_iPSB_i +libtorch_hip.so:_ZN2at4cuda4blas4trsmIN3c107complexIfEEEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_SD_iPSB_i +libtorch_hip.so:_ZN2at4cuda4blas4trsmIdEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_SA_iPS8_i +libtorch_hip.so:_ZN2at4cuda4blas4trsmIfEEvPv17hipblasSideMode_t17hipblasFillMode_t18hipblasOperation_t17hipblasDiagType_tiiPKT_SA_iPS8_i libtorch_hip.so:_ZN2at4cuda4ceilERKNS_6TensorE libtorch_hip.so:_ZN2at4cuda4cos_ERNS_6TensorE libtorch_hip.so:_ZN2at4cuda4coshERKNS_6TensorE @@ -42683,8 +42683,6 @@ libtorch_hip.so:_ZN2at4cuda6renormERKNS_6TensorERKN3c106ScalarElS7_ libtorch_hip.so:_ZN2at4cuda6round_ERNS_6TensorE libtorch_hip.so:_ZN2at4cuda6round_ERNS_6TensorEl libtorch_hip.so:_ZN2at4cuda6rsqrt_ERNS_6TensorE -libtorch_hip.so:_ZN2at4cuda6sparse23CuSparseDnMatDescriptorC1ERKNS_6TensorEl -libtorch_hip.so:_ZN2at4cuda6sparse23CuSparseDnMatDescriptorC2ERKNS_6TensorEl libtorch_hip.so:_ZN2at4cuda6sparse23CuSparseDnVecDescriptorC1ERKNS_6TensorE libtorch_hip.so:_ZN2at4cuda6sparse23CuSparseDnVecDescriptorC2ERKNS_6TensorE libtorch_hip.so:_ZN2at4cuda6sparse26CuSparseSpMatCsrDescriptorC1ERKNS_6TensorEl @@ -43944,7 +43942,7 @@ libtorch_hip.so:_ZN5torch3jit5fuser4cuda15FusedKernelCUDAC2EaNSt7__cxx1112basic_ libtorch_hip.so:_ZN5torch3jit5fuser4cuda15FusedKernelCUDAD0Ev libtorch_hip.so:_ZN5torch3jit5fuser4cuda15FusedKernelCUDAD1Ev libtorch_hip.so:_ZN5torch3jit5fuser4cuda15FusedKernelCUDAD2Ev -libtorch_hip.so:_ZN5torch3jit5fuser4cuda18codegenOutputQueryEPK15hipDeviceProp_tRiS6_Rb +libtorch_hip.so:_ZN5torch3jit5fuser4cuda18codegenOutputQueryEPK20hipDeviceProp_tR0600RiS6_Rb libtorch_hip.so:_ZN5torch4cuda10gather_outEN3c108ArrayRefIN2at6TensorEEERS4_l libtorch_hip.so:_ZN5torch4cuda11scatter_outERKN2at6TensorERSt6vectorIS2_SaIS2_EElRKN3c108optionalIS5_INSA_INS9_3hip27HIPStreamMasqueradingAsCUDAEEESaISD_EEEE libtorch_hip.so:_ZN5torch4cuda13broadcast_outERKN2at6TensorERSt6vectorIS2_SaIS2_EE diff --git a/packages/py/pytorch/abi_used_libs b/packages/py/pytorch/abi_used_libs index b7ad7ae52b97..ddc80a1ad49a 100644 --- a/packages/py/pytorch/abi_used_libs +++ b/packages/py/pytorch/abi_used_libs @@ -1,7 +1,7 @@ UNKNOWN ld-linux-x86-64.so.2 libMIOpen.so.1 -libamdhip64.so.5 +libamdhip64.so.6 libavcodec.so.58 libavformat.so.58 libavutil.so.56 @@ -9,17 +9,16 @@ libc.so.6 libgcc_s.so.1 libgflags.so.2.2 libglog.so.0 -libhipblas.so.0 +libhipblas.so.2 libhipfft.so.0 libhiprand.so.1 libhipsolver.so.0 -libhipsparse.so.0 +libhipsparse.so.1 libhiredis.so.1.0.0 liblapack.so.3 libleveldb.so.1 liblmdb.so libm.so.6 -libmagma.so.2.7 libmpi.so.40 libnuma.so.1 libomp.so diff --git a/packages/py/pytorch/abi_used_symbols b/packages/py/pytorch/abi_used_symbols index 3783dd993716..b8978f1cfa50 100644 --- a/packages/py/pytorch/abi_used_symbols +++ b/packages/py/pytorch/abi_used_symbols @@ -442,94 +442,94 @@ libMIOpen.so.1:miopenSetPoolingIndexType libMIOpen.so.1:miopenSetRNNDescriptor libMIOpen.so.1:miopenSetStream libMIOpen.so.1:miopenSetTensorDescriptor -libamdhip64.so.5:__hipPopCallConfiguration -libamdhip64.so.5:__hipPushCallConfiguration -libamdhip64.so.5:__hipRegisterFatBinary -libamdhip64.so.5:__hipRegisterFunction -libamdhip64.so.5:__hipUnregisterFatBinary -libamdhip64.so.5:hipCtxGetCurrent -libamdhip64.so.5:hipDeviceCanAccessPeer -libamdhip64.so.5:hipDeviceEnablePeerAccess -libamdhip64.so.5:hipDeviceGetPCIBusId -libamdhip64.so.5:hipDeviceGetStreamPriorityRange -libamdhip64.so.5:hipDevicePrimaryCtxGetState -libamdhip64.so.5:hipDeviceSynchronize -libamdhip64.so.5:hipDriverGetVersion -libamdhip64.so.5:hipEventCreate -libamdhip64.so.5:hipEventCreateWithFlags -libamdhip64.so.5:hipEventDestroy -libamdhip64.so.5:hipEventElapsedTime -libamdhip64.so.5:hipEventQuery -libamdhip64.so.5:hipEventRecord -libamdhip64.so.5:hipEventSynchronize -libamdhip64.so.5:hipFree -libamdhip64.so.5:hipGetDevice -libamdhip64.so.5:hipGetDeviceCount -libamdhip64.so.5:hipGetDeviceProperties -libamdhip64.so.5:hipGetErrorName -libamdhip64.so.5:hipGetErrorString -libamdhip64.so.5:hipGetLastError -libamdhip64.so.5:hipGetStreamDeviceId -libamdhip64.so.5:hipGraphDestroy -libamdhip64.so.5:hipGraphExecDestroy -libamdhip64.so.5:hipGraphGetNodes -libamdhip64.so.5:hipGraphInstantiate -libamdhip64.so.5:hipGraphLaunch -libamdhip64.so.5:hipHostFree -libamdhip64.so.5:hipHostMalloc -libamdhip64.so.5:hipHostRegister -libamdhip64.so.5:hipHostUnregister -libamdhip64.so.5:hipIpcCloseMemHandle -libamdhip64.so.5:hipIpcGetEventHandle -libamdhip64.so.5:hipIpcGetMemHandle -libamdhip64.so.5:hipIpcOpenEventHandle -libamdhip64.so.5:hipIpcOpenMemHandle -libamdhip64.so.5:hipKernelNameRef -libamdhip64.so.5:hipKernelNameRefByPtr -libamdhip64.so.5:hipLaunchKernel -libamdhip64.so.5:hipMalloc -libamdhip64.so.5:hipMemGetInfo -libamdhip64.so.5:hipMemcpy -libamdhip64.so.5:hipMemcpy2DAsync -libamdhip64.so.5:hipMemcpyAsync -libamdhip64.so.5:hipMemcpyPeerAsync -libamdhip64.so.5:hipMemcpyWithStream -libamdhip64.so.5:hipMemsetAsync -libamdhip64.so.5:hipModuleGetFunction -libamdhip64.so.5:hipModuleLaunchKernel -libamdhip64.so.5:hipModuleLoadData -libamdhip64.so.5:hipModuleOccupancyMaxActiveBlocksPerMultiprocessor -libamdhip64.so.5:hipModuleUnload -libamdhip64.so.5:hipOccupancyMaxActiveBlocksPerMultiprocessor -libamdhip64.so.5:hipOccupancyMaxPotentialBlockSize -libamdhip64.so.5:hipPeekAtLastError -libamdhip64.so.5:hipPointerGetAttributes -libamdhip64.so.5:hipRuntimeGetVersion -libamdhip64.so.5:hipSetDevice -libamdhip64.so.5:hipStreamBeginCapture -libamdhip64.so.5:hipStreamCreate -libamdhip64.so.5:hipStreamCreateWithFlags -libamdhip64.so.5:hipStreamCreateWithPriority -libamdhip64.so.5:hipStreamDestroy -libamdhip64.so.5:hipStreamEndCapture -libamdhip64.so.5:hipStreamGetCaptureInfo -libamdhip64.so.5:hipStreamGetPriority -libamdhip64.so.5:hipStreamIsCapturing -libamdhip64.so.5:hipStreamQuery -libamdhip64.so.5:hipStreamSynchronize -libamdhip64.so.5:hipStreamWaitEvent -libamdhip64.so.5:hipThreadExchangeStreamCaptureMode -libamdhip64.so.5:hiprtcAddNameExpression -libamdhip64.so.5:hiprtcCompileProgram -libamdhip64.so.5:hiprtcCreateProgram -libamdhip64.so.5:hiprtcDestroyProgram -libamdhip64.so.5:hiprtcGetCode -libamdhip64.so.5:hiprtcGetCodeSize -libamdhip64.so.5:hiprtcGetErrorString -libamdhip64.so.5:hiprtcGetLoweredName -libamdhip64.so.5:hiprtcGetProgramLog -libamdhip64.so.5:hiprtcGetProgramLogSize -libamdhip64.so.5:hiprtcVersion +libamdhip64.so.6:__hipPopCallConfiguration +libamdhip64.so.6:__hipPushCallConfiguration +libamdhip64.so.6:__hipRegisterFatBinary +libamdhip64.so.6:__hipRegisterFunction +libamdhip64.so.6:__hipUnregisterFatBinary +libamdhip64.so.6:hipCtxGetCurrent +libamdhip64.so.6:hipDeviceCanAccessPeer +libamdhip64.so.6:hipDeviceEnablePeerAccess +libamdhip64.so.6:hipDeviceGetPCIBusId +libamdhip64.so.6:hipDeviceGetStreamPriorityRange +libamdhip64.so.6:hipDevicePrimaryCtxGetState +libamdhip64.so.6:hipDeviceSynchronize +libamdhip64.so.6:hipDriverGetVersion +libamdhip64.so.6:hipEventCreate +libamdhip64.so.6:hipEventCreateWithFlags +libamdhip64.so.6:hipEventDestroy +libamdhip64.so.6:hipEventElapsedTime +libamdhip64.so.6:hipEventQuery +libamdhip64.so.6:hipEventRecord +libamdhip64.so.6:hipEventSynchronize +libamdhip64.so.6:hipFree +libamdhip64.so.6:hipGetDevice +libamdhip64.so.6:hipGetDeviceCount +libamdhip64.so.6:hipGetDevicePropertiesR0600 +libamdhip64.so.6:hipGetErrorName +libamdhip64.so.6:hipGetErrorString +libamdhip64.so.6:hipGetLastError +libamdhip64.so.6:hipGetStreamDeviceId +libamdhip64.so.6:hipGraphDestroy +libamdhip64.so.6:hipGraphExecDestroy +libamdhip64.so.6:hipGraphGetNodes +libamdhip64.so.6:hipGraphInstantiate +libamdhip64.so.6:hipGraphLaunch +libamdhip64.so.6:hipHostFree +libamdhip64.so.6:hipHostMalloc +libamdhip64.so.6:hipHostRegister +libamdhip64.so.6:hipHostUnregister +libamdhip64.so.6:hipIpcCloseMemHandle +libamdhip64.so.6:hipIpcGetEventHandle +libamdhip64.so.6:hipIpcGetMemHandle +libamdhip64.so.6:hipIpcOpenEventHandle +libamdhip64.so.6:hipIpcOpenMemHandle +libamdhip64.so.6:hipKernelNameRef +libamdhip64.so.6:hipKernelNameRefByPtr +libamdhip64.so.6:hipLaunchKernel +libamdhip64.so.6:hipMalloc +libamdhip64.so.6:hipMemGetInfo +libamdhip64.so.6:hipMemcpy +libamdhip64.so.6:hipMemcpy2DAsync +libamdhip64.so.6:hipMemcpyAsync +libamdhip64.so.6:hipMemcpyPeerAsync +libamdhip64.so.6:hipMemcpyWithStream +libamdhip64.so.6:hipMemsetAsync +libamdhip64.so.6:hipModuleGetFunction +libamdhip64.so.6:hipModuleLaunchKernel +libamdhip64.so.6:hipModuleLoadData +libamdhip64.so.6:hipModuleOccupancyMaxActiveBlocksPerMultiprocessor +libamdhip64.so.6:hipModuleUnload +libamdhip64.so.6:hipOccupancyMaxActiveBlocksPerMultiprocessor +libamdhip64.so.6:hipOccupancyMaxPotentialBlockSize +libamdhip64.so.6:hipPeekAtLastError +libamdhip64.so.6:hipPointerGetAttributes +libamdhip64.so.6:hipRuntimeGetVersion +libamdhip64.so.6:hipSetDevice +libamdhip64.so.6:hipStreamBeginCapture +libamdhip64.so.6:hipStreamCreate +libamdhip64.so.6:hipStreamCreateWithFlags +libamdhip64.so.6:hipStreamCreateWithPriority +libamdhip64.so.6:hipStreamDestroy +libamdhip64.so.6:hipStreamEndCapture +libamdhip64.so.6:hipStreamGetCaptureInfo +libamdhip64.so.6:hipStreamGetPriority +libamdhip64.so.6:hipStreamIsCapturing +libamdhip64.so.6:hipStreamQuery +libamdhip64.so.6:hipStreamSynchronize +libamdhip64.so.6:hipStreamWaitEvent +libamdhip64.so.6:hipThreadExchangeStreamCaptureMode +libamdhip64.so.6:hiprtcAddNameExpression +libamdhip64.so.6:hiprtcCompileProgram +libamdhip64.so.6:hiprtcCreateProgram +libamdhip64.so.6:hiprtcDestroyProgram +libamdhip64.so.6:hiprtcGetCode +libamdhip64.so.6:hiprtcGetCodeSize +libamdhip64.so.6:hiprtcGetErrorString +libamdhip64.so.6:hiprtcGetLoweredName +libamdhip64.so.6:hiprtcGetProgramLog +libamdhip64.so.6:hiprtcGetProgramLogSize +libamdhip64.so.6:hiprtcVersion libavcodec.so.58:av_free_packet libavcodec.so.58:av_init_packet libavcodec.so.58:av_packet_unref @@ -891,66 +891,66 @@ libglog.so.0:_ZN6google4base21CheckOpMessageBuilder7ForVar2Ev libglog.so.0:_ZN6google4base21CheckOpMessageBuilder9NewStringB5cxx11Ev libglog.so.0:_ZN6google4base21CheckOpMessageBuilderC1EPKc libglog.so.0:_ZN6google4base21CheckOpMessageBuilderD1Ev -libhipblas.so.0:hipblasCdotc -libhipblas.so.0:hipblasCdotu -libhipblas.so.0:hipblasCgelsBatched -libhipblas.so.0:hipblasCgemm -libhipblas.so.0:hipblasCgemmStridedBatched -libhipblas.so.0:hipblasCgemv -libhipblas.so.0:hipblasCgeqrfBatched -libhipblas.so.0:hipblasCgetrfBatched -libhipblas.so.0:hipblasCgetrsBatched -libhipblas.so.0:hipblasCreate -libhipblas.so.0:hipblasCtrsm -libhipblas.so.0:hipblasCtrsmBatched -libhipblas.so.0:hipblasDdot -libhipblas.so.0:hipblasDestroy -libhipblas.so.0:hipblasDgeam -libhipblas.so.0:hipblasDgelsBatched -libhipblas.so.0:hipblasDgemm -libhipblas.so.0:hipblasDgemmStridedBatched -libhipblas.so.0:hipblasDgemv -libhipblas.so.0:hipblasDgeqrfBatched -libhipblas.so.0:hipblasDgetrfBatched -libhipblas.so.0:hipblasDgetrsBatched -libhipblas.so.0:hipblasDotEx -libhipblas.so.0:hipblasDscal -libhipblas.so.0:hipblasDtrsm -libhipblas.so.0:hipblasDtrsmBatched -libhipblas.so.0:hipblasGemmBatchedEx -libhipblas.so.0:hipblasGemmEx -libhipblas.so.0:hipblasGemmStridedBatchedEx -libhipblas.so.0:hipblasGetPointerMode -libhipblas.so.0:hipblasHgemm -libhipblas.so.0:hipblasHgemmBatched -libhipblas.so.0:hipblasHgemmStridedBatched -libhipblas.so.0:hipblasSaxpy -libhipblas.so.0:hipblasSdot -libhipblas.so.0:hipblasSetAtomicsMode -libhipblas.so.0:hipblasSetPointerMode -libhipblas.so.0:hipblasSetStream -libhipblas.so.0:hipblasSgeam -libhipblas.so.0:hipblasSgelsBatched -libhipblas.so.0:hipblasSgemm -libhipblas.so.0:hipblasSgemmStridedBatched -libhipblas.so.0:hipblasSgemv -libhipblas.so.0:hipblasSgeqrfBatched -libhipblas.so.0:hipblasSgetrfBatched -libhipblas.so.0:hipblasSgetrsBatched -libhipblas.so.0:hipblasSscal -libhipblas.so.0:hipblasStrsm -libhipblas.so.0:hipblasStrsmBatched -libhipblas.so.0:hipblasZdotc -libhipblas.so.0:hipblasZdotu -libhipblas.so.0:hipblasZgelsBatched -libhipblas.so.0:hipblasZgemm -libhipblas.so.0:hipblasZgemmStridedBatched -libhipblas.so.0:hipblasZgemv -libhipblas.so.0:hipblasZgeqrfBatched -libhipblas.so.0:hipblasZgetrfBatched -libhipblas.so.0:hipblasZgetrsBatched -libhipblas.so.0:hipblasZtrsm -libhipblas.so.0:hipblasZtrsmBatched +libhipblas.so.2:hipblasCdotc +libhipblas.so.2:hipblasCdotu +libhipblas.so.2:hipblasCgelsBatched +libhipblas.so.2:hipblasCgemm +libhipblas.so.2:hipblasCgemmStridedBatched +libhipblas.so.2:hipblasCgemv +libhipblas.so.2:hipblasCgeqrfBatched +libhipblas.so.2:hipblasCgetrfBatched +libhipblas.so.2:hipblasCgetrsBatched +libhipblas.so.2:hipblasCreate +libhipblas.so.2:hipblasCtrsm +libhipblas.so.2:hipblasCtrsmBatched +libhipblas.so.2:hipblasDdot +libhipblas.so.2:hipblasDestroy +libhipblas.so.2:hipblasDgeam +libhipblas.so.2:hipblasDgelsBatched +libhipblas.so.2:hipblasDgemm +libhipblas.so.2:hipblasDgemmStridedBatched +libhipblas.so.2:hipblasDgemv +libhipblas.so.2:hipblasDgeqrfBatched +libhipblas.so.2:hipblasDgetrfBatched +libhipblas.so.2:hipblasDgetrsBatched +libhipblas.so.2:hipblasDotEx +libhipblas.so.2:hipblasDscal +libhipblas.so.2:hipblasDtrsm +libhipblas.so.2:hipblasDtrsmBatched +libhipblas.so.2:hipblasGemmBatchedEx +libhipblas.so.2:hipblasGemmEx +libhipblas.so.2:hipblasGemmStridedBatchedEx +libhipblas.so.2:hipblasGetPointerMode +libhipblas.so.2:hipblasHgemm +libhipblas.so.2:hipblasHgemmBatched +libhipblas.so.2:hipblasHgemmStridedBatched +libhipblas.so.2:hipblasSaxpy +libhipblas.so.2:hipblasSdot +libhipblas.so.2:hipblasSetAtomicsMode +libhipblas.so.2:hipblasSetPointerMode +libhipblas.so.2:hipblasSetStream +libhipblas.so.2:hipblasSgeam +libhipblas.so.2:hipblasSgelsBatched +libhipblas.so.2:hipblasSgemm +libhipblas.so.2:hipblasSgemmStridedBatched +libhipblas.so.2:hipblasSgemv +libhipblas.so.2:hipblasSgeqrfBatched +libhipblas.so.2:hipblasSgetrfBatched +libhipblas.so.2:hipblasSgetrsBatched +libhipblas.so.2:hipblasSscal +libhipblas.so.2:hipblasStrsm +libhipblas.so.2:hipblasStrsmBatched +libhipblas.so.2:hipblasZdotc +libhipblas.so.2:hipblasZdotu +libhipblas.so.2:hipblasZgelsBatched +libhipblas.so.2:hipblasZgemm +libhipblas.so.2:hipblasZgemmStridedBatched +libhipblas.so.2:hipblasZgemv +libhipblas.so.2:hipblasZgeqrfBatched +libhipblas.so.2:hipblasZgetrfBatched +libhipblas.so.2:hipblasZgetrsBatched +libhipblas.so.2:hipblasZtrsm +libhipblas.so.2:hipblasZtrsmBatched libhipfft.so.0:hipfftCreate libhipfft.so.0:hipfftExecC2C libhipfft.so.0:hipfftExecC2R @@ -1093,99 +1093,94 @@ libhipsolver.so.0:hipsolverDnZungqr libhipsolver.so.0:hipsolverDnZungqr_bufferSize libhipsolver.so.0:hipsolverDnZunmqr libhipsolver.so.0:hipsolverDnZunmqr_bufferSize -libhipsparse.so.0:hipsparseCbsrmm -libhipsparse.so.0:hipsparseCbsrmv -libhipsparse.so.0:hipsparseCbsrsm2_analysis -libhipsparse.so.0:hipsparseCbsrsm2_bufferSize -libhipsparse.so.0:hipsparseCbsrsm2_solve -libhipsparse.so.0:hipsparseCbsrsv2_analysis -libhipsparse.so.0:hipsparseCbsrsv2_bufferSize -libhipsparse.so.0:hipsparseCbsrsv2_solve -libhipsparse.so.0:hipsparseCcsrgeam2 -libhipsparse.so.0:hipsparseCcsrgeam2_bufferSizeExt -libhipsparse.so.0:hipsparseCreate -libhipsparse.so.0:hipsparseCreateBsrsm2Info -libhipsparse.so.0:hipsparseCreateBsrsv2Info -libhipsparse.so.0:hipsparseCreateCoo -libhipsparse.so.0:hipsparseCreateCsr -libhipsparse.so.0:hipsparseCreateCsrgemm2Info -libhipsparse.so.0:hipsparseCreateDnMat -libhipsparse.so.0:hipsparseCreateDnVec -libhipsparse.so.0:hipsparseCreateIdentityPermutation -libhipsparse.so.0:hipsparseCreateMatDescr -libhipsparse.so.0:hipsparseCsrSetPointers -libhipsparse.so.0:hipsparseCsrSetStridedBatch -libhipsparse.so.0:hipsparseDbsrmm -libhipsparse.so.0:hipsparseDbsrmv -libhipsparse.so.0:hipsparseDbsrsm2_analysis -libhipsparse.so.0:hipsparseDbsrsm2_bufferSize -libhipsparse.so.0:hipsparseDbsrsm2_solve -libhipsparse.so.0:hipsparseDbsrsv2_analysis -libhipsparse.so.0:hipsparseDbsrsv2_bufferSize -libhipsparse.so.0:hipsparseDbsrsv2_solve -libhipsparse.so.0:hipsparseDcsrgeam2 -libhipsparse.so.0:hipsparseDcsrgeam2_bufferSizeExt -libhipsparse.so.0:hipsparseDcsrgemm2 -libhipsparse.so.0:hipsparseDcsrgemm2_bufferSizeExt -libhipsparse.so.0:hipsparseDestroy -libhipsparse.so.0:hipsparseDestroyBsrsm2Info -libhipsparse.so.0:hipsparseDestroyBsrsv2Info -libhipsparse.so.0:hipsparseDestroyCsrgemm2Info -libhipsparse.so.0:hipsparseDestroyDnMat -libhipsparse.so.0:hipsparseDestroyDnVec -libhipsparse.so.0:hipsparseDestroyMatDescr -libhipsparse.so.0:hipsparseDestroySpMat -libhipsparse.so.0:hipsparseDnMatSetStridedBatch -libhipsparse.so.0:hipsparseSDDMM -libhipsparse.so.0:hipsparseSDDMM_bufferSize -libhipsparse.so.0:hipsparseSDDMM_preprocess -libhipsparse.so.0:hipsparseSbsrmm -libhipsparse.so.0:hipsparseSbsrmv -libhipsparse.so.0:hipsparseSbsrsm2_analysis -libhipsparse.so.0:hipsparseSbsrsm2_bufferSize -libhipsparse.so.0:hipsparseSbsrsm2_solve -libhipsparse.so.0:hipsparseSbsrsv2_analysis -libhipsparse.so.0:hipsparseSbsrsv2_bufferSize -libhipsparse.so.0:hipsparseSbsrsv2_solve -libhipsparse.so.0:hipsparseScsrgeam2 -libhipsparse.so.0:hipsparseScsrgeam2_bufferSizeExt -libhipsparse.so.0:hipsparseScsrgemm2 -libhipsparse.so.0:hipsparseScsrgemm2_bufferSizeExt -libhipsparse.so.0:hipsparseSetMatDiagType -libhipsparse.so.0:hipsparseSetMatFillMode -libhipsparse.so.0:hipsparseSetMatIndexBase -libhipsparse.so.0:hipsparseSetMatType -libhipsparse.so.0:hipsparseSetPointerMode -libhipsparse.so.0:hipsparseSetStream -libhipsparse.so.0:hipsparseSpGEMM_compute -libhipsparse.so.0:hipsparseSpGEMM_copy -libhipsparse.so.0:hipsparseSpGEMM_createDescr -libhipsparse.so.0:hipsparseSpGEMM_destroyDescr -libhipsparse.so.0:hipsparseSpGEMM_workEstimation -libhipsparse.so.0:hipsparseSpMM -libhipsparse.so.0:hipsparseSpMM_bufferSize -libhipsparse.so.0:hipsparseSpMV -libhipsparse.so.0:hipsparseSpMV_bufferSize -libhipsparse.so.0:hipsparseSpMatGetSize -libhipsparse.so.0:hipsparseXbsrsm2_zeroPivot -libhipsparse.so.0:hipsparseXbsrsv2_zeroPivot -libhipsparse.so.0:hipsparseXcoo2csr -libhipsparse.so.0:hipsparseXcoosortByRow -libhipsparse.so.0:hipsparseXcoosort_bufferSizeExt -libhipsparse.so.0:hipsparseXcsrgeam2Nnz -libhipsparse.so.0:hipsparseXcsrgemm2Nnz -libhipsparse.so.0:hipsparseXcsrsort -libhipsparse.so.0:hipsparseXcsrsort_bufferSizeExt -libhipsparse.so.0:hipsparseZbsrmm -libhipsparse.so.0:hipsparseZbsrmv -libhipsparse.so.0:hipsparseZbsrsm2_analysis -libhipsparse.so.0:hipsparseZbsrsm2_bufferSize -libhipsparse.so.0:hipsparseZbsrsm2_solve -libhipsparse.so.0:hipsparseZbsrsv2_analysis -libhipsparse.so.0:hipsparseZbsrsv2_bufferSize -libhipsparse.so.0:hipsparseZbsrsv2_solve -libhipsparse.so.0:hipsparseZcsrgeam2 -libhipsparse.so.0:hipsparseZcsrgeam2_bufferSizeExt +libhipsparse.so.1:hipsparseCbsrmm +libhipsparse.so.1:hipsparseCbsrmv +libhipsparse.so.1:hipsparseCbsrsm2_analysis +libhipsparse.so.1:hipsparseCbsrsm2_bufferSize +libhipsparse.so.1:hipsparseCbsrsm2_solve +libhipsparse.so.1:hipsparseCbsrsv2_analysis +libhipsparse.so.1:hipsparseCbsrsv2_bufferSize +libhipsparse.so.1:hipsparseCbsrsv2_solve +libhipsparse.so.1:hipsparseCcsrgeam2 +libhipsparse.so.1:hipsparseCcsrgeam2_bufferSizeExt +libhipsparse.so.1:hipsparseCreate +libhipsparse.so.1:hipsparseCreateBsrsm2Info +libhipsparse.so.1:hipsparseCreateBsrsv2Info +libhipsparse.so.1:hipsparseCreateCoo +libhipsparse.so.1:hipsparseCreateCsr +libhipsparse.so.1:hipsparseCreateCsrgemm2Info +libhipsparse.so.1:hipsparseCreateDnMat +libhipsparse.so.1:hipsparseCreateDnVec +libhipsparse.so.1:hipsparseCreateIdentityPermutation +libhipsparse.so.1:hipsparseCreateMatDescr +libhipsparse.so.1:hipsparseCsrSetPointers +libhipsparse.so.1:hipsparseDbsrmm +libhipsparse.so.1:hipsparseDbsrmv +libhipsparse.so.1:hipsparseDbsrsm2_analysis +libhipsparse.so.1:hipsparseDbsrsm2_bufferSize +libhipsparse.so.1:hipsparseDbsrsm2_solve +libhipsparse.so.1:hipsparseDbsrsv2_analysis +libhipsparse.so.1:hipsparseDbsrsv2_bufferSize +libhipsparse.so.1:hipsparseDbsrsv2_solve +libhipsparse.so.1:hipsparseDcsrgeam2 +libhipsparse.so.1:hipsparseDcsrgeam2_bufferSizeExt +libhipsparse.so.1:hipsparseDcsrgemm2 +libhipsparse.so.1:hipsparseDcsrgemm2_bufferSizeExt +libhipsparse.so.1:hipsparseDestroy +libhipsparse.so.1:hipsparseDestroyBsrsm2Info +libhipsparse.so.1:hipsparseDestroyBsrsv2Info +libhipsparse.so.1:hipsparseDestroyCsrgemm2Info +libhipsparse.so.1:hipsparseDestroyDnMat +libhipsparse.so.1:hipsparseDestroyDnVec +libhipsparse.so.1:hipsparseDestroyMatDescr +libhipsparse.so.1:hipsparseDestroySpMat +libhipsparse.so.1:hipsparseSbsrmm +libhipsparse.so.1:hipsparseSbsrmv +libhipsparse.so.1:hipsparseSbsrsm2_analysis +libhipsparse.so.1:hipsparseSbsrsm2_bufferSize +libhipsparse.so.1:hipsparseSbsrsm2_solve +libhipsparse.so.1:hipsparseSbsrsv2_analysis +libhipsparse.so.1:hipsparseSbsrsv2_bufferSize +libhipsparse.so.1:hipsparseSbsrsv2_solve +libhipsparse.so.1:hipsparseScsrgeam2 +libhipsparse.so.1:hipsparseScsrgeam2_bufferSizeExt +libhipsparse.so.1:hipsparseScsrgemm2 +libhipsparse.so.1:hipsparseScsrgemm2_bufferSizeExt +libhipsparse.so.1:hipsparseSetMatDiagType +libhipsparse.so.1:hipsparseSetMatFillMode +libhipsparse.so.1:hipsparseSetMatIndexBase +libhipsparse.so.1:hipsparseSetMatType +libhipsparse.so.1:hipsparseSetPointerMode +libhipsparse.so.1:hipsparseSetStream +libhipsparse.so.1:hipsparseSpGEMM_compute +libhipsparse.so.1:hipsparseSpGEMM_copy +libhipsparse.so.1:hipsparseSpGEMM_createDescr +libhipsparse.so.1:hipsparseSpGEMM_destroyDescr +libhipsparse.so.1:hipsparseSpGEMM_workEstimation +libhipsparse.so.1:hipsparseSpMM +libhipsparse.so.1:hipsparseSpMM_bufferSize +libhipsparse.so.1:hipsparseSpMV +libhipsparse.so.1:hipsparseSpMV_bufferSize +libhipsparse.so.1:hipsparseSpMatGetSize +libhipsparse.so.1:hipsparseXbsrsm2_zeroPivot +libhipsparse.so.1:hipsparseXbsrsv2_zeroPivot +libhipsparse.so.1:hipsparseXcoo2csr +libhipsparse.so.1:hipsparseXcoosortByRow +libhipsparse.so.1:hipsparseXcoosort_bufferSizeExt +libhipsparse.so.1:hipsparseXcsrgeam2Nnz +libhipsparse.so.1:hipsparseXcsrgemm2Nnz +libhipsparse.so.1:hipsparseXcsrsort +libhipsparse.so.1:hipsparseXcsrsort_bufferSizeExt +libhipsparse.so.1:hipsparseZbsrmm +libhipsparse.so.1:hipsparseZbsrmv +libhipsparse.so.1:hipsparseZbsrsm2_analysis +libhipsparse.so.1:hipsparseZbsrsm2_bufferSize +libhipsparse.so.1:hipsparseZbsrsm2_solve +libhipsparse.so.1:hipsparseZbsrsv2_analysis +libhipsparse.so.1:hipsparseZbsrsv2_bufferSize +libhipsparse.so.1:hipsparseZbsrsv2_solve +libhipsparse.so.1:hipsparseZcsrgeam2 +libhipsparse.so.1:hipsparseZcsrgeam2_bufferSizeExt libhiredis.so.1.0.0:redisCommand libhiredis.so.1.0.0:redisCommandArgv libhiredis.so.1.0.0:redisConnectWithTimeout @@ -1406,88 +1401,6 @@ libm.so.6:tanhf libm.so.6:tgamma libm.so.6:trunc libm.so.6:truncf -libmagma.so.2.7:magma_cgeev -libmagma.so.2.7:magma_cgels_gpu -libmagma.so.2.7:magma_cgeqrf2_gpu -libmagma.so.2.7:magma_cgeqrf_gpu -libmagma.so.2.7:magma_cgesdd -libmagma.so.2.7:magma_cgetrf_batched -libmagma.so.2.7:magma_cgetrf_gpu -libmagma.so.2.7:magma_cgetrf_nopiv_batched -libmagma.so.2.7:magma_cgetrf_nopiv_gpu -libmagma.so.2.7:magma_cgetrs_batched -libmagma.so.2.7:magma_cgetrs_gpu -libmagma.so.2.7:magma_cheevd_gpu -libmagma.so.2.7:magma_chetrf_gpu -libmagma.so.2.7:magma_cpotrf_batched -libmagma.so.2.7:magma_cpotrf_gpu -libmagma.so.2.7:magma_cpotrs_batched -libmagma.so.2.7:magma_cpotrs_gpu -libmagma.so.2.7:magma_dgeev -libmagma.so.2.7:magma_dgels_gpu -libmagma.so.2.7:magma_dgeqrf2_gpu -libmagma.so.2.7:magma_dgeqrf_gpu -libmagma.so.2.7:magma_dgesdd -libmagma.so.2.7:magma_dgetrf_batched -libmagma.so.2.7:magma_dgetrf_gpu -libmagma.so.2.7:magma_dgetrf_nopiv_batched -libmagma.so.2.7:magma_dgetrf_nopiv_gpu -libmagma.so.2.7:magma_dgetrs_batched -libmagma.so.2.7:magma_dgetrs_gpu -libmagma.so.2.7:magma_dpotrf_batched -libmagma.so.2.7:magma_dpotrf_gpu -libmagma.so.2.7:magma_dpotrs_batched -libmagma.so.2.7:magma_dpotrs_gpu -libmagma.so.2.7:magma_dsyevd_gpu -libmagma.so.2.7:magma_dsytrf_gpu -libmagma.so.2.7:magma_get_cgeqrf_nb -libmagma.so.2.7:magma_get_dgeqrf_nb -libmagma.so.2.7:magma_get_sgeqrf_nb -libmagma.so.2.7:magma_get_zgeqrf_nb -libmagma.so.2.7:magma_init -libmagma.so.2.7:magma_queue_create_from_hip_internal -libmagma.so.2.7:magma_queue_destroy_internal -libmagma.so.2.7:magma_queue_sync_internal -libmagma.so.2.7:magma_sgeev -libmagma.so.2.7:magma_sgels_gpu -libmagma.so.2.7:magma_sgeqrf2_gpu -libmagma.so.2.7:magma_sgeqrf_gpu -libmagma.so.2.7:magma_sgesdd -libmagma.so.2.7:magma_sgetrf_batched -libmagma.so.2.7:magma_sgetrf_gpu -libmagma.so.2.7:magma_sgetrf_nopiv_batched -libmagma.so.2.7:magma_sgetrf_nopiv_gpu -libmagma.so.2.7:magma_sgetrs_batched -libmagma.so.2.7:magma_sgetrs_gpu -libmagma.so.2.7:magma_spotrf_batched -libmagma.so.2.7:magma_spotrf_gpu -libmagma.so.2.7:magma_spotrs_batched -libmagma.so.2.7:magma_spotrs_gpu -libmagma.so.2.7:magma_ssyevd_gpu -libmagma.so.2.7:magma_ssytrf_gpu -libmagma.so.2.7:magma_strerror -libmagma.so.2.7:magma_version -libmagma.so.2.7:magma_zgeev -libmagma.so.2.7:magma_zgels_gpu -libmagma.so.2.7:magma_zgeqrf2_gpu -libmagma.so.2.7:magma_zgeqrf_gpu -libmagma.so.2.7:magma_zgesdd -libmagma.so.2.7:magma_zgetrf_batched -libmagma.so.2.7:magma_zgetrf_gpu -libmagma.so.2.7:magma_zgetrf_nopiv_batched -libmagma.so.2.7:magma_zgetrf_nopiv_gpu -libmagma.so.2.7:magma_zgetrs_batched -libmagma.so.2.7:magma_zgetrs_gpu -libmagma.so.2.7:magma_zheevd_gpu -libmagma.so.2.7:magma_zhetrf_gpu -libmagma.so.2.7:magma_zpotrf_batched -libmagma.so.2.7:magma_zpotrf_gpu -libmagma.so.2.7:magma_zpotrs_batched -libmagma.so.2.7:magma_zpotrs_gpu -libmagma.so.2.7:magmablas_ctrsm_batched -libmagma.so.2.7:magmablas_dtrsm_batched -libmagma.so.2.7:magmablas_strsm_batched -libmagma.so.2.7:magmablas_ztrsm_batched libmpi.so.40:MPIX_Query_cuda_support libmpi.so.40:ompi_mpi_byte libmpi.so.40:ompi_mpi_char @@ -1832,6 +1745,7 @@ libstdc++.so.6:_ZNSt13basic_filebufIcSt11char_traitsIcEE8overflowEi libstdc++.so.6:_ZNSt13basic_filebufIcSt11char_traitsIcEE9pbackfailEi libstdc++.so.6:_ZNSt13basic_filebufIcSt11char_traitsIcEE9showmanycEv libstdc++.so.6:_ZNSt13basic_filebufIcSt11char_traitsIcEE9underflowEv +libstdc++.so.6:_ZNSt13basic_filebufIcSt11char_traitsIcEEC1Ev libstdc++.so.6:_ZNSt13basic_filebufIcSt11char_traitsIcEEC2Ev libstdc++.so.6:_ZNSt13basic_filebufIcSt11char_traitsIcEED2Ev libstdc++.so.6:_ZNSt13basic_fstreamIcSt11char_traitsIcEEC1ERKNSt7__cxx1112basic_stringIcS1_SaIcEEESt13_Ios_Openmode @@ -1885,6 +1799,7 @@ libstdc++.so.6:_ZNSt18condition_variableC1Ev libstdc++.so.6:_ZNSt18condition_variableD1Ev libstdc++.so.6:_ZNSt28__atomic_futex_unsigned_base19_M_futex_notify_allEPj libstdc++.so.6:_ZNSt28__atomic_futex_unsigned_base19_M_futex_wait_untilEPjjbNSt6chrono8durationIlSt5ratioILl1ELl1EEEENS2_IlS3_ILl1ELl1000000000EEEE +libstdc++.so.6:_ZNSt28__atomic_futex_unsigned_base26_M_futex_wait_until_steadyEPjjbNSt6chrono8durationIlSt5ratioILl1ELl1EEEENS2_IlS3_ILl1ELl1000000000EEEE libstdc++.so.6:_ZNSt3_V215system_categoryEv libstdc++.so.6:_ZNSt3_V216generic_categoryEv libstdc++.so.6:_ZNSt5ctypeIcE2idE @@ -2036,6 +1951,8 @@ libstdc++.so.6:_ZTVN10__cxxabiv120__function_type_infoE libstdc++.so.6:_ZTVN10__cxxabiv120__si_class_type_infoE libstdc++.so.6:_ZTVN10__cxxabiv121__vmi_class_type_infoE libstdc++.so.6:_ZTVNSt7__cxx1115basic_stringbufIcSt11char_traitsIcESaIcEEE +libstdc++.so.6:_ZTVNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEE +libstdc++.so.6:_ZTVNSt7__cxx1119basic_ostringstreamIcSt11char_traitsIcESaIcEEE libstdc++.so.6:_ZTVSi libstdc++.so.6:_ZTVSo libstdc++.so.6:_ZTVSt11regex_error @@ -2043,6 +1960,7 @@ libstdc++.so.6:_ZTVSt12bad_weak_ptr libstdc++.so.6:_ZTVSt12future_error libstdc++.so.6:_ZTVSt12system_error libstdc++.so.6:_ZTVSt13basic_filebufIcSt11char_traitsIcEE +libstdc++.so.6:_ZTVSt14basic_ifstreamIcSt11char_traitsIcEE libstdc++.so.6:_ZTVSt15basic_streambufIcSt11char_traitsIcEE libstdc++.so.6:_ZTVSt9bad_alloc libstdc++.so.6:_ZTVSt9basic_iosIcSt11char_traitsIcEE diff --git a/packages/py/pytorch/files/enable-hipsparse-descriptors.patch b/packages/py/pytorch/files/enable-hipsparse-descriptors.patch new file mode 100644 index 000000000000..1c2acac57ed1 --- /dev/null +++ b/packages/py/pytorch/files/enable-hipsparse-descriptors.patch @@ -0,0 +1,116 @@ +From a94b6f39d1040ee5a9a1abfc675024dce386fee9 Mon Sep 17 00:00:00 2001 +From: Jeff Daily +Date: Thu, 5 Oct 2023 16:07:51 +0000 +Subject: [PATCH] [ROCm] conditionally enable hipsparse const descriptors for + version >= 2.4.0 (#110317) + +This is in preparation for upcoming backwards-incompatible hipsparse changes. + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/110317 +Approved by: https://github.com/malfet +--- + aten/src/ATen/cuda/CUDASparse.h | 25 ++++++++++++++++++++-- + aten/src/ATen/cuda/CUDASparseDescriptors.h | 19 ++++++++-------- + 2 files changed, 32 insertions(+), 12 deletions(-) + +diff --git a/aten/src/ATen/cuda/CUDASparse.h b/aten/src/ATen/cuda/CUDASparse.h +index c8fea7fe440060a..0d4520938291ca9 100644 +--- a/aten/src/ATen/cuda/CUDASparse.h ++++ b/aten/src/ATen/cuda/CUDASparse.h +@@ -1,6 +1,10 @@ + #pragma once + + #include ++#if defined(USE_ROCM) ++#include ++#define HIPSPARSE_VERSION ((hipsparseVersionMajor*100000) + (hipsparseVersionMinor*100) + hipsparseVersionPatch) ++#endif + + // cuSparse Generic API added in CUDA 10.1 + // Windows support added in CUDA 11.0 +@@ -25,20 +29,37 @@ + #define AT_USE_CUSPARSE_CONST_DESCRIPTORS() 0 + #endif + ++#if defined(USE_ROCM) ++ ++// hipSparse const API added in v2.4.0 ++#if HIPSPARSE_VERSION >= 200400 ++#define AT_USE_HIPSPARSE_CONST_DESCRIPTORS() 1 ++#define AT_USE_HIPSPARSE_GENERIC_52_API() 0 ++#define AT_USE_HIPSPARSE_GENERIC_API() 1 ++#else ++#define AT_USE_HIPSPARSE_CONST_DESCRIPTORS() 0 ++ + // hipSparse Generic API ROCm 5.2 +-#if defined(USE_ROCM) && ROCM_VERSION >= 50200 ++#if ROCM_VERSION >= 50200 + #define AT_USE_HIPSPARSE_GENERIC_52_API() 1 + #else + #define AT_USE_HIPSPARSE_GENERIC_52_API() 0 + #endif + + // hipSparse Generic API ROCm 5.1 +-#if defined(USE_ROCM) && ROCM_VERSION >= 50100 ++#if ROCM_VERSION >= 50100 + #define AT_USE_HIPSPARSE_GENERIC_API() 1 + #else + #define AT_USE_HIPSPARSE_GENERIC_API() 0 + #endif + ++#endif // HIPSPARSE_VERSION >= 200400 ++#else // USE_ROCM ++#define AT_USE_HIPSPARSE_CONST_DESCRIPTORS() 0 ++#define AT_USE_HIPSPARSE_GENERIC_52_API() 0 ++#define AT_USE_HIPSPARSE_GENERIC_API() 0 ++#endif // USE_ROCM ++ + // cuSparse Generic API spsv function was added in CUDA 11.3.0 + #if defined(CUDART_VERSION) && defined(CUSPARSE_VERSION) && (CUSPARSE_VERSION >= 11500) + #define AT_USE_CUSPARSE_GENERIC_SPSV() 1 +diff --git a/aten/src/ATen/cuda/CUDASparseDescriptors.h b/aten/src/ATen/cuda/CUDASparseDescriptors.h +index 7ab95e257f739ce..03958b1d404b9f6 100644 +--- a/aten/src/ATen/cuda/CUDASparseDescriptors.h ++++ b/aten/src/ATen/cuda/CUDASparseDescriptors.h +@@ -35,7 +35,7 @@ class CuSparseDescriptor { + std::unique_ptr> descriptor_; + }; + +-#if AT_USE_CUSPARSE_CONST_DESCRIPTORS() ++#if AT_USE_CUSPARSE_CONST_DESCRIPTORS() || AT_USE_HIPSPARSE_CONST_DESCRIPTORS() + template + struct ConstCuSparseDescriptorDeleter { + void operator()(T* x) { +@@ -58,16 +58,15 @@ class ConstCuSparseDescriptor { + protected: + std::unique_ptr> descriptor_; + }; +-#endif // AT_USE_CUSPARSE_CONST_DESCRIPTORS ++#endif // AT_USE_CUSPARSE_CONST_DESCRIPTORS || AT_USE_HIPSPARSE_CONST_DESCRIPTORS + + #if defined(USE_ROCM) +-// hipSPARSE doesn't define this +-using cusparseMatDescr = std::remove_pointer::type; +-using cusparseDnMatDescr = std::remove_pointer::type; +-using cusparseDnVecDescr = std::remove_pointer::type; +-using cusparseSpMatDescr = std::remove_pointer::type; +-using cusparseSpMatDescr = std::remove_pointer::type; +-using cusparseSpGEMMDescr = std::remove_pointer::type; ++using cusparseMatDescr = std::remove_pointer::type; ++using cusparseDnMatDescr = std::remove_pointer::type; ++using cusparseDnVecDescr = std::remove_pointer::type; ++using cusparseSpMatDescr = std::remove_pointer::type; ++using cusparseSpMatDescr = std::remove_pointer::type; ++using cusparseSpGEMMDescr = std::remove_pointer::type; + #if AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() + using bsrsv2Info = std::remove_pointer::type; + using bsrsm2Info = std::remove_pointer::type; +@@ -143,7 +142,7 @@ class TORCH_CUDA_CPP_API CuSparseSpMatDescriptor + + //AT_USE_HIPSPARSE_GENERIC_52_API() || (AT_USE_CUSPARSE_GENERIC_API() && AT_USE_CUSPARSE_NON_CONST_DESCRIPTORS()) + +-#elif AT_USE_CUSPARSE_CONST_DESCRIPTORS() ++#elif AT_USE_CUSPARSE_CONST_DESCRIPTORS() || AT_USE_HIPSPARSE_CONST_DESCRIPTORS() + class TORCH_CUDA_CPP_API CuSparseDnMatDescriptor + : public ConstCuSparseDescriptor< + cusparseDnMatDescr, diff --git a/packages/py/pytorch/files/fully-disable-rocm-kernel-asserts.patch b/packages/py/pytorch/files/fully-disable-rocm-kernel-asserts.patch new file mode 100644 index 000000000000..032991dd7b4e --- /dev/null +++ b/packages/py/pytorch/files/fully-disable-rocm-kernel-asserts.patch @@ -0,0 +1,303 @@ +From 66a76516bfc341b2b55bb2056d2faa9c2de46d69 Mon Sep 17 00:00:00 2001 +From: hongxyan +Date: Wed, 13 Dec 2023 15:44:53 +0000 +Subject: [PATCH] [ROCm] Disabling Kernel Asserts for ROCm by default - fix and + clean up and refactoring (#114660) + +Related to #103973 #110532 #108404 #94891 + +**Context:** +As commented in https://github.com/pytorch/pytorch/blob/6ae0554d11b973930d7b8ec1e937b27ac961d7bf/cmake/Dependencies.cmake#L1198 +Kernel asserts are enabled by default for CUDA and disabled for ROCm. +However it is somewhat broken, and Kernel assert was still enabled for ROCm. + +Disabling kernel assert is also needed for users who do not have PCIe atomics support. These community users have verified that disabling the kernel assert in PyTorch/ROCm platform fixed their pytorch workflow, like torch.sum script, stable-diffusion. (see the related issues) + +**Changes:** + +This pull request serves the following purposes: +* Refactor and clean up the logic, make it simpler for ROCm to enable and disable Kernel Asserts +* Fix the bug that Kernel Asserts for ROCm was not disabled by default. + +Specifically, +- Renamed `TORCH_DISABLE_GPU_ASSERTS` to `C10_USE_ROCM_KERNEL_ASSERT` for the following reasons: +(1) This variable only applies to ROCm. +(2) The new name is more align with #define CUDA_KERNEL_ASSERT function. +(3) With USE_ in front of the name, we can easily control it with environment variable to turn on and off this feature during build (e.g. `USE_ROCM_KERNEL_ASSERT=1 python setup.py develop` will enable kernel assert for ROCm build). +- Get rid of the `ROCM_FORCE_ENABLE_GPU_ASSERTS' to simplify the logic and make it easier to understand and maintain +- Added `#cmakedefine` to carry over the CMake variable to C++ + +**Tests:** +(1) build with default mode and verify that USE_ROCM_KERNEL_ASSERT is OFF(0), and kernel assert is disabled: + +``` +python setup.py develop +``` +Verify CMakeCache.txt has correct value. +``` +/xxxx/pytorch/build$ grep USE_ROCM_KERNEL_ASSERT CMakeCache.txt +USE_ROCM_KERNEL_ASSERT:BOOL=0 +``` +Tested the following code in ROCm build and CUDA build, and expected the return code differently. + +``` +subprocess.call([sys.executable, '-c', "import torch;torch._assert_async(torch.tensor(0,device='cuda'));torch.cuda.synchronize()"]) +``` +This piece of code is adapted from below unit test to get around the limitation that this unit test now was skipped for ROCm. (We will check to enable this unit test in the future) + +``` +python test/test_cuda_expandable_segments.py -k test_fixed_cuda_assert_async +``` + +Ran the following script, expecting r ==0 since the CUDA_KERNEL_ASSERT is defined as nothing: +``` +>> import sys +>>> import subprocess +>>> r=subprocess.call([sys.executable, '-c', "import torch;torch._assert_async(torch.tensor(0,device='cuda'));torch.cuda.synchronize()"]) +>>> r +0 +``` + +(2) Enable the kernel assert by building with USE_ROCM_KERNEL_ASSERT=1, or USE_ROCM_KERNEL_ASSERT=ON +``` +USE_ROCM_KERNEL_ASSERT=1 python setup.py develop +``` + +Verify `USE_ROCM_KERNEL_ASSERT` is `1` +``` +/xxxx/pytorch/build$ grep USE_ROCM_KERNEL_ASSERT CMakeCache.txt +USE_ROCM_KERNEL_ASSERT:BOOL=1 +``` + +Run the assert test, and expected return code not equal to 0. + +``` +>> import sys +>>> import subprocess +>>> r=subprocess.call([sys.executable, '-c', "import torch;torch._assert_async(torch.tensor(0,device='cuda'));torch.cuda.synchronize()"]) +>>>/xxxx/pytorch/aten/src/ATen/native/hip/TensorCompare.hip:108: _assert_async_cuda_kernel: Device-side assertion `input[0] != 0' failed. +:0:rocdevice.cpp :2690: 2435301199202 us: [pid:206019 tid:0x7f6cf0a77700] Callback: Queue 0x7f64e8400000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 + +>>> r +-6 +``` + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/114660 +Approved by: https://github.com/jeffdaily, https://github.com/malfet, https://github.com/jithunnair-amd +--- + CMakeLists.txt | 2 +- + build.bzl | 2 +- + c10/CMakeLists.txt | 1 + + c10/macros/Macros.h | 19 +++++++------------ + c10/macros/cmake_macros.h.in | 1 + + c10/ovrsource_defs.bzl | 1 + + caffe2/core/macros.h.in | 4 ++-- + cmake/Dependencies.cmake | 20 ++++++++++---------- + cmake/Summary.cmake | 2 +- + setup.py | 3 +++ + 10 files changed, 28 insertions(+), 27 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index c243652416ec9b8..9194e520bb002db 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -288,7 +288,7 @@ option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the + option(USE_XNNPACK "Use XNNPACK" ON) + option(USE_ZMQ "Use ZMQ" OFF) + option(USE_ZSTD "Use ZSTD" OFF) +-option(TORCH_DISABLE_GPU_ASSERTS "Disable GPU asserts by default" OFF) ++option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF) + # Ensure that an ITT build is the default for x86 CPUs + cmake_dependent_option( + USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON +diff --git a/build.bzl b/build.bzl +index deb01aab23c483e..6490a7f3839eb80 100644 +--- a/build.bzl ++++ b/build.bzl +@@ -24,7 +24,7 @@ def define_targets(rules): + "CAFFE2_USE_CUDNN", + "USE_MKLDNN", + "CAFFE2_USE_ITT", +- "TORCH_DISABLE_GPU_ASSERTS", ++ "USE_ROCM_KERNEL_ASSERT", + "EIGEN_MPL2_ONLY", + ], + ) +diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt +index 2d5fbf555c1dc29..68396a654d299c5 100644 +--- a/c10/CMakeLists.txt ++++ b/c10/CMakeLists.txt +@@ -18,6 +18,7 @@ set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in + set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in + set(C10_USE_NUMA ${USE_NUMA}) + set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) ++set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) + configure_file( + ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in + ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h) +diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h +index 55171fa1a44cc74..563625b296d2ac1 100644 +--- a/c10/macros/Macros.h ++++ b/c10/macros/Macros.h +@@ -374,9 +374,7 @@ extern SYCL_EXTERNAL void __assert_fail( + unsigned int line, + const char* func); + #else // __SYCL_DEVICE_ONLY__ +-#if ( \ +- defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \ +- !defined(TORCH_DISABLE_GPU_ASSERTS)) ++#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__))) + // CUDA supports __assert_fail function which are common for both device + // and host side code. + __host__ __device__ +@@ -393,18 +391,14 @@ __host__ __device__ + unsigned int line, + const char* function) noexcept __attribute__((__noreturn__)); + +-#if (defined(__HIP_ARCH__) || defined(__HIP__)) && \ +- !defined(TORCH_DISABLE_GPU_ASSERTS) +-// ROCm supports __assert_fail only as a device side function. +-__device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail( +- const char* assertion, +- const char* file, +- unsigned int line, +- const char* function); +-#endif // defined(__HIP_ARCH__) || defined(__HIP__) + #endif // __SYCL_DEVICE_ONLY__ + } + #endif // NDEBUG ++// ROCm disable kernel assert by default ++#if !defined(C10_USE_ROCM_KERNEL_ASSERT) and defined(USE_ROCM) ++#define CUDA_KERNEL_ASSERT(cond) ++#define SYCL_KERNEL_ASSERT(cond) ++#else + #define CUDA_KERNEL_ASSERT(cond) \ + if (C10_UNLIKELY(!(cond))) { \ + __assert_fail( \ +@@ -415,6 +409,7 @@ __device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail( + __assert_fail( \ + #cond, __FILE__, static_cast(__LINE__), __func__); \ + } ++#endif // C10_USE_ROCM_KERNEL_ASSERT and USE_ROCM + #endif // __APPLE__ + + #ifdef __APPLE__ +diff --git a/c10/macros/cmake_macros.h.in b/c10/macros/cmake_macros.h.in +index a1e4fd8ce1b495d..76c185b55236c14 100644 +--- a/c10/macros/cmake_macros.h.in ++++ b/c10/macros/cmake_macros.h.in +@@ -9,5 +9,6 @@ + #cmakedefine C10_USE_GFLAGS + #cmakedefine C10_USE_NUMA + #cmakedefine C10_USE_MSVC_STATIC_RUNTIME ++#cmakedefine C10_USE_ROCM_KERNEL_ASSERT + + #endif // C10_MACROS_CMAKE_MACROS_H_ +diff --git a/c10/ovrsource_defs.bzl b/c10/ovrsource_defs.bzl +index 391f3cbf698ff2b..0ca1f728631b502 100644 +--- a/c10/ovrsource_defs.bzl ++++ b/c10/ovrsource_defs.bzl +@@ -104,6 +104,7 @@ def define_ovrsource_targets(): + ("#cmakedefine C10_BUILD_SHARED_LIBS", ""), + ("#cmakedefine C10_USE_NUMA", ""), + ("#cmakedefine C10_USE_MSVC_STATIC_RUNTIME", ""), ++ ("#cmakedefine C10_USE_ROCM_KERNEL_ASSERT", ""), + ] + + mobile_c10_cmake_defines = [ +diff --git a/caffe2/core/macros.h.in b/caffe2/core/macros.h.in +index 997752ede308cdd..4a2fe0c946bee75 100644 +--- a/caffe2/core/macros.h.in ++++ b/caffe2/core/macros.h.in +@@ -26,13 +26,13 @@ + #cmakedefine CAFFE2_USE_NVTX + #cmakedefine CAFFE2_USE_ITT + #cmakedefine CAFFE2_USE_TRT +-#cmakedefine TORCH_DISABLE_GPU_ASSERTS + + #ifndef EIGEN_MPL2_ONLY + #cmakedefine EIGEN_MPL2_ONLY + #endif + + // Useful build settings that are recorded in the compiled binary ++// torch.__build__.show() + #define CAFFE2_BUILD_STRINGS { \ + {"TORCH_VERSION", "${TORCH_VERSION}"}, \ + {"CXX_COMPILER", "${CMAKE_CXX_COMPILER}"}, \ +@@ -68,5 +68,5 @@ + {"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \ + {"USE_ITT", "${CAFFE2_USE_ITT}"}, \ + {"USE_TRT", "${CAFFE2_USE_TRT}"}, \ +- {"TORCH_DISABLE_GPU_ASSERTS", "${TORCH_DISABLE_GPU_ASSERTS}"}, \ ++ {"USE_ROCM_KERNEL_ASSERT", "${USE_ROCM_KERNEL_ASSERT}"}, \ + } +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index 4ba9bd44f505f6b..acc95842b63149d 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1192,16 +1192,6 @@ if(ANDROID) + list(APPEND Caffe2_DEPENDENCY_LIBS log) + endif() + +-# ---[ Kernel asserts +-# Kernel asserts are enabled by default for CUDA and disabled for ROCm. +-# For ROCm, it can be enabled by setting ROCM_FORCE_ENABLE_GPU_ASSERTS +-if(USE_ROCM AND ROCM_FORCE_ENABLE_GPU_ASSERTS) +- message(STATUS "Forcefully enabling kernel asserts on ROCM") +-elseif(USE_ROCM AND NOT ROCM_FORCE_ENABLE_GPU_ASSERTS) +- message(STATUS "Disabling kernel asserts for ROCm") +- caffe2_update_option(TORCH_DISABLE_GPU_ASSERTS ON) +-endif() +- + # ---[ LLVM + if(USE_LLVM) + message(STATUS "Looking for LLVM in ${USE_LLVM}") +@@ -1249,6 +1239,7 @@ if(USE_ROCM) + caffe2_update_option(USE_SYSTEM_NCCL ON) + endif() + ++ + list(APPEND HIP_CXX_FLAGS -fPIC) + list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_AMD__=1) + list(APPEND HIP_CXX_FLAGS -DCUDA_HAS_FP16=1) +@@ -1291,6 +1282,15 @@ if(USE_ROCM) + list(APPEND Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS + roc::hipblas hip::hipfft hip::hiprand roc::hipsparse roc::hipsolver) + ++ # ---[ Kernel asserts ++ # Kernel asserts is disabled for ROCm by default. ++ # It can be turned on by turning on the env USE_ROCM_KERNEL_ASSERT to the build system. ++ if(USE_ROCM_KERNEL_ASSERT) ++ message(STATUS "Enabling Kernel Assert for ROCm") ++ else() ++ message(STATUS "Disabling Kernel Assert for ROCm") ++ endif() ++ + else() + caffe2_update_option(USE_ROCM OFF) + endif() +diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake +index 0cb9aef3e621dc0..9c05aac28be8429 100644 +--- a/cmake/Summary.cmake ++++ b/cmake/Summary.cmake +@@ -198,5 +198,5 @@ function(caffe2_print_configuration_summary) + # coreml + message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}") + message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}") +- message(STATUS " TORCH_DISABLE_GPU_ASSERTS : ${TORCH_DISABLE_GPU_ASSERTS}") ++ message(STATUS " USE_ROCM_KERNEL_ASSERT : ${USE_ROCM_KERNEL_ASSERT}") + endfunction() +diff --git a/setup.py b/setup.py +index 5e59c4a0986a17f..86cad767c120665 100644 +--- a/setup.py ++++ b/setup.py +@@ -160,6 +160,9 @@ + # USE_ZSTD + # Enables use of ZSTD, if the libraries are found + # ++# USE_ROCM_KERNEL_ASSERT=1 ++# Enable kernel assert in ROCm platform ++# + # Environment variables we respect (these environment variables are + # conventional and are often understood/set by other software.) + # diff --git a/packages/py/pytorch/files/rocm-6.0.patch b/packages/py/pytorch/files/rocm-6.0.patch new file mode 100644 index 000000000000..aa70bcb89fc6 --- /dev/null +++ b/packages/py/pytorch/files/rocm-6.0.patch @@ -0,0 +1,57 @@ +From 602abf6b55cb11c3dce0c046b3cfbc417b5080d6 Mon Sep 17 00:00:00 2001 +From: Jeff Daily +Date: Wed, 20 Dec 2023 20:19:25 +0000 +Subject: [PATCH] [ROCm] more 6.0 changes (#115946) + +Pull Request resolved: https://github.com/pytorch/pytorch/pull/115946 +Approved by: https://github.com/pruthvistony, https://github.com/huydhn, https://github.com/malfet +--- + aten/src/ATen/cuda/detail/CUDAHooks.cpp | 2 +- + caffe2/CMakeLists.txt | 2 +- + caffe2/core/common_gpu.h | 2 +- + caffe2/operators/elementwise_ops.cu | 2 ++ + cmake/public/LoadHIP.cmake | 3 +-- + 5 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.cpp b/aten/src/ATen/cuda/detail/CUDAHooks.cpp +index acb9b1931f045e0..24c29a638154499 100644 +--- a/aten/src/ATen/cuda/detail/CUDAHooks.cpp ++++ b/aten/src/ATen/cuda/detail/CUDAHooks.cpp +@@ -151,7 +151,7 @@ bool CUDAHooks::isPinnedPtr(const void* data) const { + return false; + } + #endif +-#if !defined(USE_ROCM) ++#if !defined(USE_ROCM) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) + return attr.type == cudaMemoryTypeHost; + #else + return attr.memoryType == cudaMemoryTypeHost; +diff --git a/caffe2/core/common_gpu.h b/caffe2/core/common_gpu.h +index af1cc891e567eff..bbf25a51352175b 100644 +--- a/caffe2/core/common_gpu.h ++++ b/caffe2/core/common_gpu.h +@@ -86,7 +86,7 @@ namespace caffe2 { + class TensorCoreEngine {}; + #endif // USE_ROCM + +-#if !defined(USE_ROCM) ++#if !defined(USE_ROCM) || (defined(USE_ROCM) && ROCM_VERSION >= 50700) + #define CAFFE2_CUDA_PTRATTR_MEMTYPE type + #else + #define CAFFE2_CUDA_PTRATTR_MEMTYPE memoryType +diff --git a/caffe2/operators/elementwise_ops.cu b/caffe2/operators/elementwise_ops.cu +index b78b94c52147806..2237f2015feba2d 100644 +--- a/caffe2/operators/elementwise_ops.cu ++++ b/caffe2/operators/elementwise_ops.cu +@@ -13,8 +13,10 @@ + // until we use hipblas v2 + // hipify correctly maps things like CUDA_R_16F to HIP_R_16F, + // however hipblas v1 is still using its custom type ++#ifndef HIPBLAS_V2 + #define HIP_R_16F HIPBLAS_R_16F + #define HIP_R_32F HIPBLAS_R_32F ++#endif + #endif // USE_ROCM + + namespace caffe2 { + diff --git a/packages/py/pytorch/package.yml b/packages/py/pytorch/package.yml index 3bcafd3a01b0..1857e2205c41 100644 --- a/packages/py/pytorch/package.yml +++ b/packages/py/pytorch/package.yml @@ -1,8 +1,8 @@ name : pytorch -version : 2.1.1 -release : 21 +version : 2.1.2 +release : 22 source : - - https://github.com/pytorch/pytorch/releases/download/v2.1.1/pytorch-v2.1.1.tar.gz : 1aa2aacced3c60c935d05f6d80232f8e99cdcb09eb51ceea697857b90c98d3fa + - https://github.com/pytorch/pytorch/releases/download/v2.1.2/pytorch-v2.1.2.tar.gz : 85effbcce037bffa290aea775c9a4bad5f769cb229583450c40055501ee1acd7 homepage : https://pytorch.org/ license : BSD-3-Clause component : @@ -21,6 +21,7 @@ patterns : - /usr/include avx2 : yes clang : yes +debug : no builddeps : - pkgconfig(eigen3) - pkgconfig(fftw3) @@ -53,13 +54,13 @@ builddeps : - pkgconfig(libhsakmt) - rocm-core-devel - rocm-cmake - - rocm-hip-devel + - rocm-hip - hipblas-devel - hipcub - hipfft-devel - hipsolver-devel - hipsparse-devel - - hipmagma-devel + # - hipmagma-devel - miopen-devel - miopengemm-devel - rccl-devel @@ -91,13 +92,13 @@ rundeps : - libtorch # ROCm packages - rocm-core-devel - - rocm-hip-devel + - rocm-hip - hipblas-devel - hipcub - hipfft-devel - hipsolver-devel - hipsparse-devel - - hipmagma-devel + # - hipmagma-devel - miopen-devel - miopengemm-devel - rccl-devel @@ -134,7 +135,7 @@ environment: | USE_LEVELDB=1 \ USE_LAPACK=1 \ USE_LMDB=1 \ - USE_MAGMA=1 \ + USE_MAGMA=0 \ USE_MKLDNN=0 \ USE_BLAS=1 \ USE_OPENCV=1 \ @@ -149,10 +150,15 @@ environment: | USE_SYSTEM_PYBIND11=1 \ USE_SYSTEM_TBB=1 \ USE_SYSTEM_ZSTD=1 \ - REL_WITH_DEB_INFO=1 \ + REL_WITH_DEB_INFO=0 \ export ROCM_PATH=/usr export HIP_PATH=/usr + export HIP_COMPILER=clang + export HIP_RUNTIME=amd + export HIP_ROCCLR_HOME=/usr/lib64/llvm-rocm + export HIP_CLANG_PATH=/usr/lib64/llvm-rocm/bin + export HIP_CXX_COMPILER=/usr/bin/hipcc export DEVICE_LIB_PATH="/usr/lib64/amdgcn/bitcode" export PYTORCH_ROCM_ARCH="gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" @@ -161,12 +167,19 @@ environment: | # Cannot compile C code with clang due to https://github.com/pytorch/pytorch/issues/103222 export CC=gcc + export CFLAGS="${CFLAGS/-g2 /}" + export CXXFLAGS="${CXXFLAGS/-g2 /}" export CXXFLAGS="${CXXFLAGS/-D_FORTIFY_SOURCE=2 -fstack-protector-strong --param=ssp-buffer-size=32/-fcf-protection=none}" + # Too noisy + export CFLAGS="$CFLAGS -w" + export CXXFLAGS="$CXXFLAGS -w" + export HIPCC_COMPILE_FLAGS_APPEND="-w" # Set it in case your computer starts chocking horribly # export MAX_JOBS=$((%YJOBS%/4)) + export MAX_JOBS=%YJOBS% # For packagers. The default 5G is barely enough for a single build. - # export CCACHE_MAXSIZE=20G + export CCACHE_MAXSIZE=20G setup : | # Make sure lapack is detected. sed -i 's|(BLAS_INFO STREQUAL "generic")|TRUE|' cmake/Modules/FindLAPACK.cmake @@ -180,6 +193,28 @@ setup : | # They forgot to include iostream and some other standard libraries sed -i 's|||' binaries/dump_operator_names.cc + # Backports from PyTorch 2.2.0 + %patch -p1 -i $pkgfiles/enable-hipsparse-descriptors.patch + %patch -p1 -i $pkgfiles/fully-disable-rocm-kernel-asserts.patch + %patch -p1 -i $pkgfiles/rocm-6.0.patch + + # The CMake option HIP_CXX_COMPILER is set by default to CMAKE_CXX_COMPILER + # by HIP, so we have to cheeze this option to CMake using PyTorch existing + # code for setting the CUDA compiler. This will also work when PyTorch + # decides to switch to native HIP support in CMake. + sed -i 's|CUDA_NVCC_EXECUTABLE|HIP_CXX_COMPILER|' tools/setup_helpers/cmake.py + sed -i 's|CMAKE_CUDA_COMPILER|HIP_CXX_COMPILER|' tools/setup_helpers/cmake.py + sed -i 's|CUDACXX|HIPCXX|' tools/setup_helpers/cmake.py + + # __HIP_PLATFORM_HCC__ no longer recognized in ROCm 6 + sed -i 's|__HIP_PLATFORM_HCC__|__HIP_PLATFORM_AMD__|g' \ + cmake/Dependencies.cmake \ + torch/CMakeLists.txt \ + torch/utils/cpp_extension.py \ + caffe2/CMakeLists.txt \ + third_party/nvfuser/CMakeLists.txt \ + aten/src/ATen/native/cuda/layer_norm_kernel.cu \ + python3 ./tools/amd_build/build_amd.py # From Arch. diff --git a/packages/py/pytorch/pspec_x86_64.xml b/packages/py/pytorch/pspec_x86_64.xml index be3540b83f1b..8378d23c8786 100644 --- a/packages/py/pytorch/pspec_x86_64.xml +++ b/packages/py/pytorch/pspec_x86_64.xml @@ -20,7 +20,7 @@ programming.python - libtorch + libtorch /usr/bin/convert-caffe2-to-onnx @@ -1550,12 +1550,12 @@ /usr/lib/python3.10/site-packages/nvfuser/_C.cpython-310-x86_64-linux-gnu.so /usr/lib/python3.10/site-packages/nvfuser/__init__.py /usr/lib/python3.10/site-packages/nvfuser/__pycache__/__init__.cpython-310.pyc - /usr/lib/python3.10/site-packages/torch-2.1.1+gitunknown-py3.10.egg-info/PKG-INFO - /usr/lib/python3.10/site-packages/torch-2.1.1+gitunknown-py3.10.egg-info/SOURCES.txt - /usr/lib/python3.10/site-packages/torch-2.1.1+gitunknown-py3.10.egg-info/dependency_links.txt - /usr/lib/python3.10/site-packages/torch-2.1.1+gitunknown-py3.10.egg-info/entry_points.txt - /usr/lib/python3.10/site-packages/torch-2.1.1+gitunknown-py3.10.egg-info/requires.txt - /usr/lib/python3.10/site-packages/torch-2.1.1+gitunknown-py3.10.egg-info/top_level.txt + /usr/lib/python3.10/site-packages/torch-2.1.2+gitunknown-py3.10.egg-info/PKG-INFO + /usr/lib/python3.10/site-packages/torch-2.1.2+gitunknown-py3.10.egg-info/SOURCES.txt + /usr/lib/python3.10/site-packages/torch-2.1.2+gitunknown-py3.10.egg-info/dependency_links.txt + /usr/lib/python3.10/site-packages/torch-2.1.2+gitunknown-py3.10.egg-info/entry_points.txt + /usr/lib/python3.10/site-packages/torch-2.1.2+gitunknown-py3.10.egg-info/requires.txt + /usr/lib/python3.10/site-packages/torch-2.1.2+gitunknown-py3.10.egg-info/top_level.txt /usr/lib/python3.10/site-packages/torch/_C.cpython-310-x86_64-linux-gnu.so /usr/lib/python3.10/site-packages/torch/_C/_VariableFunctions.pyi /usr/lib/python3.10/site-packages/torch/_C/__init__.pyi @@ -5020,7 +5020,7 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. - libtorch + libtorch /usr/include/ATen/ATen.h @@ -14110,7 +14110,7 @@ /usr/include/torch/script.h /usr/lib64/cmake/ATen/ATenConfig.cmake /usr/lib64/cmake/Caffe2/Caffe2Config.cmake - /usr/lib64/cmake/Caffe2/Caffe2Targets-relwithdebinfo.cmake + /usr/lib64/cmake/Caffe2/Caffe2Targets-release.cmake /usr/lib64/cmake/Caffe2/Caffe2Targets.cmake /usr/lib64/cmake/Caffe2/FindCUDAToolkit.cmake /usr/lib64/cmake/Caffe2/FindCUSPARSELT.cmake @@ -14132,7 +14132,7 @@ /usr/lib64/cmake/Caffe2/public/mkldnn.cmake /usr/lib64/cmake/Caffe2/public/protobuf.cmake /usr/lib64/cmake/Caffe2/public/utils.cmake - /usr/lib64/cmake/Tensorpipe/TensorpipeTargets-relwithdebinfo.cmake + /usr/lib64/cmake/Tensorpipe/TensorpipeTargets-release.cmake /usr/lib64/cmake/Tensorpipe/TensorpipeTargets.cmake /usr/lib64/cmake/Torch/TorchConfig.cmake /usr/lib64/cmake/Torch/TorchConfigVersion.cmake @@ -14142,9 +14142,9 @@ - - 2023-12-07 - 2.1.1 + + 2024-01-05 + 2.1.2 Packaging update Gavin Zhao me@gzgz.dev