Skip to content

Commit

Permalink
Bump MIOpen version to 3.0.1 and update CI docker (partial cherry-pic…
Browse files Browse the repository at this point in the history
…k of ROCm#2519)

Differences from ROCm#2519:
- rocm_setup_version(VERSION 3.1.0) -> rocm_setup_version(VERSION 3.0.1)
- Changes of requirements.txt REVERTED

(cherry picked from commit 7da72bc and EDITED)

# RESOLVED Conflicts:
#	Dockerfile
#	dev-requirements.txt
#	requirements.txt -- REVERTED
  • Loading branch information
atamazov committed Dec 27, 2023
1 parent c4a091f commit 2c9fe96
Show file tree
Hide file tree
Showing 17 changed files with 76 additions and 51 deletions.
15 changes: 14 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ if(NOT WIN32 AND NOT APPLE)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
endif()

rocm_setup_version(VERSION 3.00.0)
rocm_setup_version(VERSION 3.0.1)

list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake )
include(TargetFlags)
Expand Down Expand Up @@ -761,6 +761,19 @@ enable_cppcheck(
knownConditionTrueFalse
shadowFunction
moduloofone
###################################################################
# TODO Code Quality WORKAROUND ROCm 6.0 &&
# Ubuntu 22.04 && cppcheck 2.12.1 update
###################################################################
duplInheritedMember
constParameterCallback
constParameterReference
constParameterPointer
constVariableReference
constVariablePointer
useStlAlgorithm
uselessOverride
unusedScopedObject
FORCE
SOURCES
addkernels/
Expand Down
24 changes: 15 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:20.04 as miopen
FROM ubuntu:22.04 as miopen
ARG DEBIAN_FRONTEND=noninteractive

# Support multiarch
Expand All @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg

RUN wget https://repo.radeon.com/amdgpu-install/5.7/ubuntu/focal/amdgpu-install_5.7.50700-1_all.deb --no-check-certificate
RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
./amdgpu-install_5.7.50700-1_all.deb
./amdgpu-install_6.0.60000-1_all.deb

# Add rocm repository
RUN export ROCM_APT_VER=5.7;\
RUN export ROCM_APT_VER=6.0;\
echo $ROCM_APT_VER &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"

RUN amdgpu-install -y --usecase=rocm --no-dkms

Expand Down Expand Up @@ -94,11 +94,17 @@ RUN rm -rf /tmp/ccache* && mkdir /tmp/ccache && wget https://github.com/ccache/c
cd /tmp/ccache-${CCACHE_COMMIT}/build && \
cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/*
RUN ccache -s

# purge existing composable kernel installed with ROCm
# hence cannot use autoremove since it will remove more components
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \
composablekernel-dev
ARG COMPILER_LAUNCHER=""
RUN if [ "$USE_FIN" = "ON" ]; then \
rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
else \
rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
fi

RUN ccache -s
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
ROCmSoftwarePlatform/rocm-recipes
-f requirements.txt
danmar/cppcheck@2.9
danmar/cppcheck@2.12.1
2 changes: 1 addition & 1 deletion docs/DebugAndLogging.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Direct Solutions:
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2<n>` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1`
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
endif()
if(ENABLE_HIP_WORKAROUNDS)
# Workaround hip not setting its usage requirements correctly
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 )
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 )
endif()
# This is helpful for the tests
target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:hip::device> )
Expand Down
6 changes: 4 additions & 2 deletions src/comgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,8 +1293,10 @@ void BuildHip(const std::string& name,
miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
compiler::lc::RemoveOptionsUnwanted(opts);
opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
#endif
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");
Expand Down
2 changes: 1 addition & 1 deletion src/composable_kernel/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CheckOptions:
- key: bugprone-reserved-identifier.AllowedIdentifiers
value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__'
value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__'
2 changes: 1 addition & 1 deletion src/composable_kernel/cmake/ClangTidy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET)
add_custom_target(${tidy_target}
# for some targets clang-tidy not able to get information from .clang-tidy
DEPENDS ${SOURCE}
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..."
)
Expand Down
10 changes: 5 additions & 5 deletions src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define EXECUTION_SPECIFIER __device__
#else
#define EXECUTION_SPECIFIER
Expand All @@ -43,7 +43,7 @@ typedef union

// Composable kernels are written in HIP language. The language doesnt support
// ushort2.hi or ushort2.low.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
ushort ushortvec[2];
#endif // MIOPEN_BACKEND_HIP
float f32;
Expand All @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
{
cvt_bf16_fp32_t target_val;

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.ushortx2 = make_ushort2(0, src_val);
#else
target_val.ushortx2 = (ushort2)(0, src_val);
Expand Down Expand Up @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
// incrementing it causes it to become an exponent of 0xFF and a mantissa
// of 0x00, which is Inf, the next higher value to the unrounded value.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
#else
target_val.u32 +=
Expand All @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
#endif // MIOPEN_USE_RNE_BFLOAT16
}

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
return target_val.ushortvec[1];
#else
return target_val.ushortx2.hi;
Expand Down
2 changes: 1 addition & 1 deletion src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor&
}
}

std::size_t out_c;
std::size_t out_c = 0;
std::vector<std::size_t> out_lens(spatial_dim + 2);

auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim);
Expand Down
10 changes: 5 additions & 5 deletions src/kernels/bfloat16_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define EXECUTION_SPECIFIER __device__
#else
#define EXECUTION_SPECIFIER
Expand All @@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32

// Composable kernels are written in HIP language. The language doesnt support
// ushort2.hi or ushort2.low.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
ushort ushortvec[2];
#endif // MIOPEN_BACKEND_HIP
float f32;
Expand All @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
{
cvt_bf16_fp32_t target_val;

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.ushortx2 = make_ushort2(0, src_val);
#else
target_val.ushortx2 = (ushort2)(0, src_val);
Expand Down Expand Up @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
// incrementing it causes it to become an exponent of 0xFF and a mantissa
// of 0x00, which is Inf, the next higher value to the unrounded value.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
#else
target_val.u32 +=
Expand All @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
#endif // MIOPEN_USE_RNE_BFLOAT16
}

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
return target_val.ushortvec[1];
#else
return target_val.ushortx2.hi;
Expand Down
36 changes: 18 additions & 18 deletions src/kernels/float_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#define FOUR 4
#define EIGHT 8
#if MIOPEN_USE_FP8 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT hip_f8<miopen_f8::hip_f8_type::fp8>
#define FLOAT_ACCUM float
// HIP implements the correct operators for conversion
Expand All @@ -58,7 +58,7 @@
#endif // MIOPEN_USE_FP8

#if MIOPEN_USE_BFP8 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT hip_f8<miopen_f8::hip_f8_type::bf8>
#define FLOAT_ACCUM float
#else
Expand All @@ -79,7 +79,7 @@
// #endif
#endif // MIOPEN_USE_BFP8

#ifndef __HIP_PLATFORM_HCC__
#ifndef __HIP_PLATFORM_AMD__
#define _FLOAT2 PPCAT(_FLOAT, TWO)
#define _FLOAT4 PPCAT(_FLOAT, FOUR)
#define _FLOAT8 PPCAT(_FLOAT, EIGHT)
Expand All @@ -99,19 +99,19 @@
#endif

#if MIOPEN_USE_DOUBLE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT_ACCUM double
#else
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define _FLOAT_ACCUM double
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define MAX_VAL_ACCUM DBL_MAX
#else // MIOPEN_USE_DOUBLE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT_ACCUM float
#else
#define _FLOAT_ACCUM float
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#ifndef FLT_MAX
#define MAX_VAL_ACCUM 3.402823466e+38F
#else
Expand All @@ -120,12 +120,12 @@
#endif // MIOPEN_USE_DOUBLE_ACCUM

#if MIOPEN_USE_FP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT _Float16
#else // __HIP_PLATFORM_HCC__
#else // __HIP_PLATFORM_AMD__
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define _FLOAT half
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define SIZEOF_FLOAT 2
// Max value for the main datatype
#ifndef HALF_MAX
Expand All @@ -136,11 +136,11 @@
#endif // MIOPEN_USE_FP16

#if MIOPEN_USE_FP32 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT float
#else
#define _FLOAT float
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define SIZEOF_FLOAT 4
// Max value for the main datatype
#ifndef FLT_MAX
Expand All @@ -151,7 +151,7 @@
#endif // MIOPEN_USE_FP32

#if MIOPEN_USE_BFP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT ushort
#else
#define _FLOAT ushort
Expand All @@ -162,7 +162,7 @@
#endif // MIOPEN_USE_BFP16

#if MIOPEN_USE_FP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
#define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
#define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
Expand All @@ -188,7 +188,7 @@
/// refactoring should be considered as nontrivial and requires
/// a separate PR. Let's keep this historical stuff for now.
/// --atamazov 30.08.2023
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
#define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
#define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
Expand All @@ -202,7 +202,7 @@
#endif // MIOPEN_USE_FP32

#if MIOPEN_USE_BFP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
#define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED
#define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
Expand Down Expand Up @@ -232,7 +232,7 @@
#endif

#if MIOPEN_USE_NATIVE_DATATYPE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#undef FLOAT_ACCUM
#define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED
#else
Expand All @@ -250,7 +250,7 @@
#define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x))

#undef CVT_INTEGRAL2ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
#else
#if MIOPEN_USE_BFP16 == 1
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/hip_f8_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
// #include <half.hpp>
namespace miopen_hip_f8_impl {

#ifndef __HIP_PLATFORM_HCC__
#ifndef __HIP_PLATFORM_AMD__
using hip_bfloat16 = bfloat16;
using half = half_float::half;
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/hip_float8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#endif

// FP8 header version 0.4, 2021/05/11
#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE
#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE
// MIOpen by default does not have device code in the regular compilation paths,
// therefore, when this file is used from the host side, compilation takes much
// longer. By guarding the __device__ directive we can control that such compilation
Expand Down
2 changes: 1 addition & 1 deletion src/rnn_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc,
const int seqLength,
const RNNDir_t dir)
{
if(miopen::IsLoggingCmd())
if(miopen::IsLoggingCmd() && seqLength > 0)
{
std::string mode;
miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode;
Expand Down
Loading

0 comments on commit 2c9fe96

Please sign in to comment.