Skip to content

Commit

Permalink
Merge pull request #38 from CNugteren/development
Browse files Browse the repository at this point in the history
Update to version 2.3.0
  • Loading branch information
CNugteren committed May 22, 2016
2 parents cba89a4 + ae12ebe commit b887e1e
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 24 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@

Version 2.3.0
- Added support for 'short' and 'cl_half' data-types as kernel buffer and scalar arguments
- Fixed a bug where failed results would still show up in the tuning results
- Made MSVC link the run-time libraries statically

Version 2.2.0
- Added two new simpler samples of using the tuner (vector-add and convolution)
- Updated the general documentation
Expand Down
46 changes: 26 additions & 20 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@
#
# ==================================================================================================

# CMake project
cmake_minimum_required(VERSION 2.8.10)

# Overrides for MSVC static runtime
set(CMAKE_USER_MAKE_RULES_OVERRIDE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/c_flag_overrides.cmake)
set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_flag_overrides.cmake)

# CMake project details
project("cltune" CXX)
set(cltune_VERSION_MAJOR 2)
set(cltune_VERSION_MINOR 2)
set(cltune_VERSION_MINOR 3)
set(cltune_VERSION_PATCH 0)

# Options
Expand All @@ -54,40 +59,41 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH false) # Don't add the automatically deter
# ==================================================================================================

# Compiler-version check (requires at least CMake 2.8.10)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
message(FATAL_ERROR "GCC version must be at least 4.7")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Clang)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3)
message(FATAL_ERROR "Clang version must be at least 3.3")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
message(FATAL_ERROR "Clang version must be at least 5.0")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
message(FATAL_ERROR "AppleClang version must be at least 5.0")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 14.0)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 14.0)
message(FATAL_ERROR "ICC version must be at least 14.0")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 18.0)
elseif(MSVC)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 18.0)
message(FATAL_ERROR "MS Visual Studio version must be at least 18.0")
endif()
endif()

# C++ compiler settings
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
set(FLAGS "/Ox /wd4715 /wd4996")
else ()
if(MSVC)
set(FLAGS "/Ox")
set(FLAGS "${FLAGS} /wd4715 /wd4996")
else()
set(FLAGS "-O3 -std=c++11")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
set(FLAGS "${FLAGS} -Wall -Wno-comment")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.4)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8.4)
set(FLAGS "${FLAGS} -Wno-attributes")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
set(FLAGS "${FLAGS} -Wextra")
elseif(CMAKE_CXX_COMPILER_ID MATCHES Clang)
set(FLAGS "${FLAGS} -Wextra -Wno-c++98-compat -Wno-c++98-compat-pedantic")
endif()
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
Expand Down
8 changes: 8 additions & 0 deletions cmake/c_flag_overrides.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Overriding the CMake flags to use static runtime libraries
# See http://www.cmake.org/Wiki/CMake_FAQ#How_can_I_build_my_MSVC_application_with_a_static_runtime.3F
if(MSVC)
set(CMAKE_C_FLAGS_DEBUG_INIT "/D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
set(CMAKE_C_FLAGS_MINSIZEREL_INIT "/MT /O1 /Ob1 /D NDEBUG")
set(CMAKE_C_FLAGS_RELEASE_INIT "/MT /O2 /Ob2 /D NDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO_INIT "/MT /Zi /O2 /Ob1 /D NDEBUG")
endif()
8 changes: 8 additions & 0 deletions cmake/cxx_flag_overrides.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Overriding the CMake flags to use static runtime libraries
# See http://www.cmake.org/Wiki/CMake_FAQ#How_can_I_build_my_MSVC_application_with_a_static_runtime.3F
if(MSVC)
set(CMAKE_CXX_FLAGS_DEBUG_INIT "/D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "/MT /O1 /Ob1 /D NDEBUG")
set(CMAKE_CXX_FLAGS_RELEASE_INIT "/MT /O2 /Ob2 /D NDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "/MT /Zi /O2 /Ob1 /D NDEBUG")
endif()
9 changes: 8 additions & 1 deletion include/internal/tuner_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@
namespace cltune {
// =================================================================================================

// Host data-type for half-precision floating-point (16-bit)
#if USE_OPENCL
using half = cl_half;
#else
using half = short unsigned int;
#endif

// Shorthands for complex data-types
using float2 = std::complex<float>; // cl_float2;
using double2 = std::complex<double>; // cl_double2;
Expand All @@ -62,7 +69,7 @@ using double2 = std::complex<double>; // cl_double2;
#endif

// Enumeration of currently supported data-types by this class
enum class MemType { kInt, kSizeT, kFloat, kDouble, kFloat2, kDouble2 };
enum class MemType { kShort, kInt, kSizeT, kHalf, kFloat, kDouble, kFloat2, kDouble2 };

// See comment at top of file for a description of the class
class TunerImpl {
Expand Down
22 changes: 20 additions & 2 deletions src/cltune.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ void Tuner::AddArgumentInput(const std::vector<T> &source) {
}

// Compiles the function for various data-types
template void PUBLIC_API Tuner::AddArgumentInput<short>(const std::vector<short>&);
template void PUBLIC_API Tuner::AddArgumentInput<int>(const std::vector<int>&);
template void PUBLIC_API Tuner::AddArgumentInput<size_t>(const std::vector<size_t>&);
template void PUBLIC_API Tuner::AddArgumentInput<half>(const std::vector<half>&);
template void PUBLIC_API Tuner::AddArgumentInput<float>(const std::vector<float>&);
template void PUBLIC_API Tuner::AddArgumentInput<double>(const std::vector<double>&);
template void PUBLIC_API Tuner::AddArgumentInput<float2>(const std::vector<float2>&);
Expand All @@ -188,8 +190,10 @@ void Tuner::AddArgumentOutput(const std::vector<T> &source) {
}

// Compiles the function for various data-types
template void PUBLIC_API Tuner::AddArgumentOutput<short>(const std::vector<short>&);
template void PUBLIC_API Tuner::AddArgumentOutput<int>(const std::vector<int>&);
template void PUBLIC_API Tuner::AddArgumentOutput<size_t>(const std::vector<size_t>&);
template void PUBLIC_API Tuner::AddArgumentOutput<half>(const std::vector<half>&);
template void PUBLIC_API Tuner::AddArgumentOutput<float>(const std::vector<float>&);
template void PUBLIC_API Tuner::AddArgumentOutput<double>(const std::vector<double>&);
template void PUBLIC_API Tuner::AddArgumentOutput<float2>(const std::vector<float2>&);
Expand All @@ -198,12 +202,18 @@ template void PUBLIC_API Tuner::AddArgumentOutput<double2>(const std::vector<dou
// Sets a scalar value as an argument to the kernel. Since a vector of scalars of any type doesn't
// exist, there is no general implemenation. Instead, each data-type has its specialised version in
// which it stores to a specific vector.
template <> void PUBLIC_API Tuner::AddArgumentScalar<short>(const short argument) {
pimpl->arguments_int_.push_back({pimpl->argument_counter_++, argument});
}
template <> void PUBLIC_API Tuner::AddArgumentScalar<int>(const int argument) {
pimpl->arguments_int_.push_back({pimpl->argument_counter_++, argument});
}
template <> void PUBLIC_API Tuner::AddArgumentScalar<size_t>(const size_t argument) {
pimpl->arguments_size_t_.push_back({pimpl->argument_counter_++, argument});
}
template <> void PUBLIC_API Tuner::AddArgumentScalar<half>(const half argument) {
pimpl->arguments_float_.push_back({pimpl->argument_counter_++, argument});
}
template <> void PUBLIC_API Tuner::AddArgumentScalar<float>(const float argument) {
pimpl->arguments_float_.push_back({pimpl->argument_counter_++, argument});
}
Expand Down Expand Up @@ -352,10 +362,18 @@ void Tuner::PrintJSON(const std::string &filename,
fprintf(file, " \"device_compute_units\": \"%zu\",\n", pimpl->device().ComputeUnits());
fprintf(file, " \"results\": [\n");

// Filters failed configurations
auto results = std::vector<TunerImpl::TunerResult>();
for (const auto &tuning_result: pimpl->tuning_results_) {
if (tuning_result.status && tuning_result.time != std::numeric_limits<double>::max()) {
results.push_back(tuning_result);
}
}

// Loops over all the results
auto num_results = pimpl->tuning_results_.size();
auto num_results = results.size();
for (auto r=size_t{0}; r<num_results; ++r) {
auto result = pimpl->tuning_results_[r];
auto result = results[r];
fprintf(file, " {\n");
fprintf(file, " \"kernel\": \"%s\",\n", result.kernel_name.c_str());
fprintf(file, " \"time\": %.3lf,\n", result.time);
Expand Down
11 changes: 10 additions & 1 deletion src/tuner_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,16 @@ void TunerImpl::Tune() {

// Stores the parameters and the timing-result
tuning_result.configuration = permutation;
tuning_results_.push_back(tuning_result);
if (tuning_result.time == std::numeric_limits<float>::max()) {
tuning_result.time = 0.0;
PrintResult(stdout, tuning_result, kMessageFailure);
tuning_result.time = std::numeric_limits<float>::max();
tuning_result.status = false;
}
else if (!tuning_result.status) {
PrintResult(stdout, tuning_result, kMessageWarning);
}
tuning_results_.push_back(tuning_result);
}

// Prints a log of the searching process. This is disabled per default, but can be enabled
Expand Down Expand Up @@ -265,8 +266,10 @@ TunerImpl::TunerResult TunerImpl::RunKernel(const std::string &source, const Ker
// Sets the output buffer(s) to zero
for (auto &output: arguments_output_) {
switch (output.type) {
case MemType::kShort: ResetMemArgument<short>(output); break;
case MemType::kInt: ResetMemArgument<int>(output); break;
case MemType::kSizeT: ResetMemArgument<size_t>(output); break;
case MemType::kHalf: ResetMemArgument<half>(output); break;
case MemType::kFloat: ResetMemArgument<float>(output); break;
case MemType::kDouble: ResetMemArgument<double>(output); break;
case MemType::kFloat2: ResetMemArgument<float2>(output); break;
Expand Down Expand Up @@ -357,8 +360,10 @@ void TunerImpl::StoreReferenceOutput() {
reference_outputs_.clear();
for (auto &output_buffer: arguments_output_) {
switch (output_buffer.type) {
case MemType::kShort: DownloadReference<short>(output_buffer); break;
case MemType::kInt: DownloadReference<int>(output_buffer); break;
case MemType::kSizeT: DownloadReference<size_t>(output_buffer); break;
case MemType::kHalf: DownloadReference<half>(output_buffer); break;
case MemType::kFloat: DownloadReference<float>(output_buffer); break;
case MemType::kDouble: DownloadReference<double>(output_buffer); break;
case MemType::kFloat2: DownloadReference<float2>(output_buffer); break;
Expand All @@ -385,8 +390,10 @@ bool TunerImpl::VerifyOutput() {
auto i = size_t{0};
for (auto &output_buffer: arguments_output_) {
switch (output_buffer.type) {
case MemType::kShort: status &= DownloadAndCompare<short>(output_buffer, i); break;
case MemType::kInt: status &= DownloadAndCompare<int>(output_buffer, i); break;
case MemType::kSizeT: status &= DownloadAndCompare<size_t>(output_buffer, i); break;
case MemType::kHalf: status &= DownloadAndCompare<half>(output_buffer, i); break;
case MemType::kFloat: status &= DownloadAndCompare<float>(output_buffer, i); break;
case MemType::kDouble: status &= DownloadAndCompare<double>(output_buffer, i); break;
case MemType::kFloat2: status &= DownloadAndCompare<float2>(output_buffer, i); break;
Expand Down Expand Up @@ -612,8 +619,10 @@ void TunerImpl::PrintHeader(const std::string &header_name) const {
// =================================================================================================

// Get the MemType based on a template argument
template <> MemType TunerImpl::GetType<short>() { return MemType::kShort; }
template <> MemType TunerImpl::GetType<int>() { return MemType::kInt; }
template <> MemType TunerImpl::GetType<size_t>() { return MemType::kSizeT; }
template <> MemType TunerImpl::GetType<half>() { return MemType::kHalf; }
template <> MemType TunerImpl::GetType<float>() { return MemType::kFloat; }
template <> MemType TunerImpl::GetType<double>() { return MemType::kDouble; }
template <> MemType TunerImpl::GetType<float2>() { return MemType::kFloat2; }
Expand Down

0 comments on commit b887e1e

Please sign in to comment.