Skip to content

Commit

Permalink
Raise MSVC warning level from /W3 to /W4
Browse files Browse the repository at this point in the history
This patch increases the warning level when using the MSVC compiler from
`/W3` to `/W4` and fixes the issues found. Four warnings introduced by
`/W4` are disabled, all related to variable name shadowing, as they
overly prescriptive to valid code.
  • Loading branch information
kbenzie committed Oct 22, 2024
1 parent d8cc532 commit da462e9
Show file tree
Hide file tree
Showing 63 changed files with 243 additions and 194 deletions.
15 changes: 11 additions & 4 deletions cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,25 @@ function(add_ur_target_compile_options name)
elseif(MSVC)
target_compile_options(${name} PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/MP> # clang-cl.exe does not support /MP
/W3
/W4
/wd4456 # Disable: declaration of 'identifier' hides previous local declaration
/wd4457 # Disable: declaration of 'identifier' hides function parameter
/wd4458 # Disable: declaration of 'identifier' hides class member
/wd4459 # Disable: declaration of 'identifier' hides global declaration
/MD$<$<CONFIG:Debug>:d>
/GS
/DWIN32_LEAN_AND_MEAN
/DNOMINMAX
)

if(UR_DEVELOPER_MODE)
target_compile_definitions(${name} PRIVATE
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
# C4267: The compiler detected a conversion from size_t to a smaller type.
_CRT_SECURE_NO_WARNINGS
)

if(UR_DEVELOPER_MODE)
target_compile_options(${name} PRIVATE
/WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
/WX /GS
)
endif()
endif()
Expand Down
7 changes: 7 additions & 0 deletions examples/collector/collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,14 @@
#include <string_view>

#include "ur_api.h"

#ifdef _MSC_VER
#pragma warning(disable : 4245)
#endif
#include "xpti/xpti_trace_framework.h"
#ifdef _MSC_VER
#pragma warning(default : 4245)
#endif

constexpr uint16_t TRACE_FN_BEGIN =
static_cast<uint16_t>(xpti::trace_point_type_t::function_with_args_begin);
Expand Down
2 changes: 1 addition & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ typedef struct ur_physical_mem_handle_t_ *ur_physical_mem_handle_t;
///////////////////////////////////////////////////////////////////////////////
#ifndef UR_BIT
/// @brief Generic macro for enumerator bit masks
#define UR_BIT(_i) (1 << _i)
#define UR_BIT(_i) (1U << _i)
#endif // UR_BIT

///////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion scripts/core/common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ name: "$x_physical_mem_handle_t"
type: macro
desc: "Generic macro for enumerator bit masks"
name: "$X_BIT( _i )"
value: "( 1 << _i )"
value: "( 1U << _i )"
--- #--------------------------------------------------------------------------
type: enum
desc: "Defines Return/Error codes"
Expand Down
7 changes: 4 additions & 3 deletions source/adapters/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,16 @@ if (UR_ENABLE_TRACING)
get_target_property(XPTI_SRC_DIR xpti SOURCE_DIR)
set(XPTI_PROXY_SRC "${XPTI_SRC_DIR}/xpti_proxy.cpp")
endif()
target_compile_definitions(${TARGET_NAME} PRIVATE
add_library(cuda-xpti-proxy STATIC ${XPTI_PROXY_SRC})
target_compile_definitions(cuda-xpti-proxy PRIVATE
XPTI_ENABLE_INSTRUMENTATION
XPTI_STATIC_LIBRARY
)
target_include_directories(${TARGET_NAME} PRIVATE
target_include_directories(cuda-xpti-proxy PRIVATE
${XPTI_INCLUDES}
${CUDA_CUPTI_INCLUDE_DIR}
)
target_sources(${TARGET_NAME} PRIVATE ${XPTI_PROXY_SRC})
target_link_libraries(${TARGET_NAME} PRIVATE cuda-xpti-proxy)
endif()

if (CUDA_cupti_LIBRARY)
Expand Down
26 changes: 13 additions & 13 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ static ur_result_t enqueueCommandBufferFillHelper(
if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) {
CUDA_MEMSET_NODE_PARAMS NodeParams = {};
NodeParams.dst = DstPtr;
NodeParams.elementSize = PatternSize;
NodeParams.elementSize = static_cast<unsigned int>(PatternSize);
NodeParams.height = N;
NodeParams.pitch = PatternSize;
NodeParams.width = 1;
Expand Down Expand Up @@ -508,12 +508,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
auto &ArgIndices = hKernel->getArgIndices();
CUDA_KERNEL_NODE_PARAMS NodeParams = {};
NodeParams.func = CuFunc;
NodeParams.gridDimX = BlocksPerGrid[0];
NodeParams.gridDimY = BlocksPerGrid[1];
NodeParams.gridDimZ = BlocksPerGrid[2];
NodeParams.blockDimX = ThreadsPerBlock[0];
NodeParams.blockDimY = ThreadsPerBlock[1];
NodeParams.blockDimZ = ThreadsPerBlock[2];
NodeParams.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
NodeParams.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
NodeParams.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
NodeParams.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
NodeParams.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
NodeParams.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
NodeParams.sharedMemBytes = LocalSize;
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());

Expand Down Expand Up @@ -1391,12 +1391,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;

Params.func = CuFunc;
Params.gridDimX = BlocksPerGrid[0];
Params.gridDimY = BlocksPerGrid[1];
Params.gridDimZ = BlocksPerGrid[2];
Params.blockDimX = ThreadsPerBlock[0];
Params.blockDimY = ThreadsPerBlock[1];
Params.blockDimZ = ThreadsPerBlock[2];
Params.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
Params.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
Params.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
Params.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
Params.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
Params.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
Params.kernelParams =
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,

try {
if (pNumDevices) {
*pNumDevices = NumDevices;
*pNumDevices = static_cast<uint32_t>(NumDevices);
}

if (ReturnDevices && phDevices) {
Expand Down Expand Up @@ -1233,7 +1233,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
uint64_t *pDeviceTimestamp,
uint64_t *pHostTimestamp) {
CUevent Event;
CUevent Event{};
ScopedContext Active(hDevice);

if (pDeviceTimestamp) {
Expand Down
50 changes: 28 additions & 22 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
int MinGrid, MaxBlockSize;
UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize(
&MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(),
MaxBlockDim[0]));
static_cast<int>(MaxBlockDim[0])));

roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized,
MaxBlockDim, MaxBlockSize);
Expand Down Expand Up @@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
MaxWorkGroupSize = Device->getMaxWorkGroupSize();

if (ProvidedLocalWorkGroupSize) {
auto IsValid = [&](int Dim) {
auto IsValid = [&](size_t Dim) {
if (ReqdThreadsPerBlock[Dim] != 0 &&
LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
Expand All @@ -217,7 +217,8 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;

if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim))
if (LocalWorkSize[Dim] >
Device->getMaxWorkItemSizes(static_cast<int>(Dim)))
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
// Checks that local work sizes are a divisor of the global work sizes
// which includes that the local work sizes are neither larger than
Expand Down Expand Up @@ -481,9 +482,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(

auto &ArgIndices = hKernel->getArgIndices();
UR_CHECK_ERROR(cuLaunchKernel(
CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
CuStream, const_cast<void **>(ArgIndices.data()), nullptr));
CuFunc, static_cast<unsigned int>(BlocksPerGrid[0]),
static_cast<unsigned int>(BlocksPerGrid[1]),
static_cast<unsigned int>(BlocksPerGrid[2]),
static_cast<unsigned int>(ThreadsPerBlock[0]),
static_cast<unsigned int>(ThreadsPerBlock[1]),
static_cast<unsigned int>(ThreadsPerBlock[2]), LocalSize, CuStream,
const_cast<void **>(ArgIndices.data()), nullptr));

if (LocalSize != 0)
hKernel->clearLocalSize();
Expand Down Expand Up @@ -649,12 +654,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
auto &ArgIndices = hKernel->getArgIndices();

CUlaunchConfig launch_config;
launch_config.gridDimX = BlocksPerGrid[0];
launch_config.gridDimY = BlocksPerGrid[1];
launch_config.gridDimZ = BlocksPerGrid[2];
launch_config.blockDimX = ThreadsPerBlock[0];
launch_config.blockDimY = ThreadsPerBlock[1];
launch_config.blockDimZ = ThreadsPerBlock[2];
launch_config.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
launch_config.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
launch_config.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
launch_config.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
launch_config.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
launch_config.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);

launch_config.sharedMemBytes = LocalSize;
launch_config.hStream = CuStream;
Expand Down Expand Up @@ -979,8 +984,9 @@ ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize,
auto OffsetPtr = Ptr + (step * sizeof(uint8_t));

// set all of the pattern chunks
UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, Value, sizeof(uint8_t),
Height, Stream));
UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch,
static_cast<unsigned char>(Value),
sizeof(uint8_t), Height, Stream));
}
return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -1031,8 +1037,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
break;
}
default: {
UR_CHECK_ERROR(commonMemSetLargePattern(Stream, patternSize, size,
pPattern, DstDevice));
UR_CHECK_ERROR(
commonMemSetLargePattern(Stream, static_cast<uint32_t>(patternSize),
size, pPattern, DstDevice));
break;
}
}
Expand Down Expand Up @@ -1064,7 +1071,6 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
return 4;
default:
detail::ur::die("Invalid image format.");
return 0;
}
}

Expand Down Expand Up @@ -1168,7 +1174,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));

int ElementByteSize = imageElementByteSize(ArrayDesc);
int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));

size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
Expand Down Expand Up @@ -1241,7 +1247,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));

int ElementByteSize = imageElementByteSize(ArrayDesc);
int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));

size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
Expand Down Expand Up @@ -1320,7 +1326,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);

int ElementByteSize = imageElementByteSize(SrcArrayDesc);
int ElementByteSize = static_cast<int>(imageElementByteSize(SrcArrayDesc));

size_t DstByteOffsetX =
dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels;
Expand Down Expand Up @@ -1505,8 +1511,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
CuStream));
break;
default:
commonMemSetLargePattern(CuStream, patternSize, size, pPattern,
(CUdeviceptr)ptr);
commonMemSetLargePattern(CuStream, static_cast<uint32_t>(patternSize),
size, pPattern, (CUdeviceptr)ptr);
break;
}
if (phEvent) {
Expand Down
5 changes: 3 additions & 2 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
ur_result_t Result = UR_RESULT_SUCCESS;
try {
ScopedContext Active(hDevice);
UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch,
widthInBytes, height, elementSizeBytes));
UR_CHECK_ERROR(
cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, widthInBytes,
height, static_cast<unsigned int>(elementSizeBytes)));
} catch (ur_result_t error) {
Result = error;
} catch (...) {
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(

int MaxNumActiveGroupsPerCU{0};
UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor(
&MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize,
dynamicSharedMemorySize));
&MaxNumActiveGroupsPerCU, hKernel->get(),
static_cast<int>(localWorkSize), dynamicSharedMemorySize));
detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0);
// Handle the case where we can't have all SMs active with at least 1 group
// per SM. In that case, the device is still able to run 1 work-group, hence
Expand Down
8 changes: 4 additions & 4 deletions source/adapters/cuda/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ struct ur_kernel_handle_t_ {
}
ParamSizes[Index] = Size;
// calculate the insertion point on the array
size_t InsertPos = std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + Index, 0);
size_t InsertPos = std::accumulate(
std::begin(ParamSizes), std::begin(ParamSizes) + Index, size_t{0});
// Update the stored value for the argument
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
Expand Down Expand Up @@ -152,8 +152,8 @@ struct ur_kernel_handle_t_ {
const args_index_t &getIndices() const noexcept { return Indices; }

uint32_t getLocalSize() const {
return std::accumulate(std::begin(OffsetPerIndex),
std::end(OffsetPerIndex), 0);
return static_cast<uint32_t>(std::accumulate(
std::begin(OffsetPerIndex), std::end(OffsetPerIndex), size_t{0}));
}
} Args;

Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
}

UR_CHECK_ERROR(cuModuleLoadDataEx(&Module, static_cast<const void *>(Binary),
Options.size(), Options.data(),
OptionVals.data()));
static_cast<unsigned int>(Options.size()),
Options.data(), OptionVals.data()));

BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS;

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/cuda/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx,

enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align,
void **Ptr) {
auto Res = allocateImpl(Ptr, Size, Align);
auto Res = allocateImpl(Ptr, Size, static_cast<uint32_t>(Align));
if (Res != UR_RESULT_SUCCESS) {
getLastStatusRef() = Res;
return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
Expand Down
18 changes: 10 additions & 8 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,19 @@ if(UR_BUILD_ADAPTER_L0)
endif()

# TODO: fix level_zero adapter conversion warnings
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(ur_adapter_level_zero PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4267>
)

set_target_properties(ur_adapter_level_zero PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800)
if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero PRIVATE
Expand Down Expand Up @@ -182,18 +183,19 @@ if(UR_BUILD_ADAPTER_L0_V2)
target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2)

# TODO: fix level_zero adapter conversion warnings
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4100 /wd4267>
)

set_target_properties(ur_adapter_level_zero_v2 PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800)
if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero_v2 PRIVATE
Expand Down
Loading

0 comments on commit da462e9

Please sign in to comment.