Skip to content

Commit

Permalink
Merge branch 'development'
Browse files Browse the repository at this point in the history
  • Loading branch information
CNugteren committed Jun 26, 2017
2 parents 35de111 + 3c577cc commit 6b7c50b
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 32 deletions.
19 changes: 11 additions & 8 deletions .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
environment:
global:
CLTUNE_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\cltune"
CLTUNE_ROOT: "C:\\cltune\\build"
OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"
OPENCL_ROOT: "C:\\dependencies\\opencl"

platform:
- x64
Expand All @@ -21,25 +21,28 @@ install:
- ps: pushd $env:OPENCL_ROOT
- ps: $opencl_registry = $env:OPENCL_REGISTRY
# This downloads the source to the Khronos ICD library
- git clone --depth 1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
- git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
- ps: pushd OpenCL-ICD-Loader
- git checkout cb4acb9 # older version (pre 2.2 support)
- ps: popd
- ps: mv ./OpenCL-ICD-Loader/* .
# This downloads all the opencl header files
# The cmake build files expect a directory called inc
- ps: mkdir inc/CL
- git clone --depth 1 https://github.com/KhronosGroup/OpenCL-Headers.git inc/CL
- ps: wget $opencl_registry/api/2.1/cl.hpp -OutFile inc/CL/cl.hpp
# Switch to OpenCL 2.1 headers
- ps: pushd inc/CL
- git fetch origin opencl21:opencl21
- git checkout opencl21
- ps: popd
# - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
# Create the static import lib in a directory called lib, so findopencl() will find it
- ps: mkdir lib
- ps: pushd lib
- cmake -G "NMake Makefiles" ..
- nmake
- ps: popd
# Switch to OpenCL 1.2 headers
- ps: pushd inc/CL
- git fetch origin opencl12:opencl12
- git checkout opencl12
- ps: popd
# Rename the inc directory to include, so FindOpencl() will find it
- ps: ren inc include
- ps: popd
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@

Version 2.7.0
- CLTune now automatically ensures global size is a multiple of the local workgroup size
- Added GetBestResult() to the tuner's API to retrieve the best parameters programmatically
- Changed std::initalizer_list in the AddParameters API to std::vector
- Fixed a bug in the simulated annealing search method

Version 2.6.0
- Changed timing measurements to now also include the (varying) kernel launch overhead
- It is now possible to set OpenCL compiler options through the env variable CLTUNE_BUILD_OPTIONS
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_fla
# CMake project details
project("cltune" CXX)
set(cltune_VERSION_MAJOR 2)
set(cltune_VERSION_MINOR 6)
set(cltune_VERSION_MINOR 7)
set(cltune_VERSION_PATCH 0)

# Options and their default values
Expand Down
5 changes: 4 additions & 1 deletion doc/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Adds a new kernel to the list of tuning-kernels and returns a unique ID (to be u
* `size_t AddKernelFromString(const std::string &source, const std::string &kernel_name, const IntRange &global, const IntRange &local)`:
As above, but now the kernel is loaded from a string instead of from a file.

* `void AddParameter(const size_t id, const std::string &parameter_name, const std::initializer_list<size_t> &values)`:
* `void AddParameter(const size_t id, const std::string &parameter_name, const std::vector<size_t> &values)`:
Adds a new tuning parameter for the kernel with the given `id`. The parameter has as a name `parameter_name`, and a list of tuneable integer values.

* `void MulGlobalSize(const size_t id, const StringRange range)`:
Expand Down Expand Up @@ -89,6 +89,9 @@ Call this method *after* calling the `Tune()` method. Trains a machine learning
Output
-------------

* `std::unordered_map<std::string, size_t> GetBestResult()`:
Retrieves the parameters of the best tuning result and returns them to the caller as a map of strings (parameter names) to integers (parameter values).

* `void OutputSearchLog(const std::string &filename)`:
Outputs the search process to the file `filename`.

Expand Down
6 changes: 5 additions & 1 deletion include/cltune.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <memory> // std::unique_ptr
#include <functional> // std::function
#include <utility> // std::pair
#include <unordered_map> // std::unordered_map

// Exports library functions under Windows when building a DLL. See also:
// https://msdn.microsoft.com/en-us/library/a90k134d.aspx
Expand Down Expand Up @@ -93,7 +94,7 @@ class Tuner {
// Adds a new tuning parameter for a kernel with a specific ID. The parameter has a name, the
// number of values, and a list of values.
void PUBLIC_API AddParameter(const size_t id, const std::string &parameter_name,
const std::initializer_list<size_t> &values);
const std::vector<size_t> &values);

// As above, but now adds a single valued parameter to the reference
void PUBLIC_API AddParameterReference(const std::string &parameter_name, const size_t value);
Expand Down Expand Up @@ -144,6 +145,9 @@ class Tuner {
void PUBLIC_API ModelPrediction(const Model model_type, const float validation_fraction,
const size_t test_top_x_configurations);

// Retrieves the parameters of the best tuning result
std::unordered_map<std::string, size_t> GetBestResult() const;

// Prints the results of the tuning either to screen (stdout) or to a specific output-file.
// Returns the execution time in miliseconds.
double PUBLIC_API PrintToScreen() const;
Expand Down
11 changes: 11 additions & 0 deletions include/internal/tuner_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ class TunerImpl {
// Prints results of a particular kernel run
void PrintResult(FILE* fp, const TunerResult &result, const std::string &message) const;

// Retrieves the best tuning result
TunerResult GetBestResult() const;

// Loads a file from disk into a string
std::string LoadFile(const std::string &filename);

Expand All @@ -146,6 +149,14 @@ class TunerImpl {
// argument. Supports all enumerations of MemType.
template <typename T> MemType GetType();

// Rounding functions performing ceiling and division operations
size_t CeilDiv(const size_t x, const size_t y) {
return 1 + ((x - 1) / y);
}
size_t Ceil(const size_t x, const size_t y) {
return CeilDiv(x,y)*y;
}

// Accessors to device data-types
const Device device() const { return device_; }
const Context context() const { return context_; }
Expand Down
36 changes: 19 additions & 17 deletions src/cltune.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ void Tuner::SetReferenceFromString(const std::string &source, const std::string

// Adds parameters for a kernel to tune. Also checks whether this parameter already exists.
void Tuner::AddParameter(const size_t id, const std::string &parameter_name,
const std::initializer_list<size_t> &values) {
const std::vector<size_t> &values) {
if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
if (pimpl->kernels_[id].ParameterExists(parameter_name)) {
throw std::runtime_error("Parameter already exists");
Expand Down Expand Up @@ -283,19 +283,27 @@ void Tuner::ModelPrediction(const Model model_type, const float validation_fract

// =================================================================================================


// Retrieves the parameters of the best tuning result
std::unordered_map<std::string, size_t> Tuner::GetBestResult() const {
const auto best_result = pimpl->GetBestResult();
const auto best_configuration = best_result.configuration;

// Converts the std::vector<KernelInfo::Setting> into an unordere map of strings and integers
auto parameters = std::unordered_map<std::string, size_t>{};
for (const auto &parameter_setting : best_configuration) {
parameters[parameter_setting.name] = parameter_setting.value;
}
return parameters;
}

// Iterates over all tuning results and prints each parameter configuration and the corresponding
// timing-results. Printing is to stdout.
double Tuner::PrintToScreen() const {

// Finds the best result
auto best_result = pimpl->tuning_results_[0];
auto best_time = std::numeric_limits<double>::max();
for (auto &tuning_result: pimpl->tuning_results_) {
if (tuning_result.status && best_time >= tuning_result.time) {
best_result = tuning_result;
best_time = tuning_result.time;
}
}
const auto best_result = pimpl->GetBestResult();
const auto best_time = best_result.time;

// Aborts if there was no best time found
if (best_time == std::numeric_limits<double>::max()) {
Expand All @@ -321,14 +329,8 @@ double Tuner::PrintToScreen() const {
void Tuner::PrintFormatted() const {

// Finds the best result
auto best_result = pimpl->tuning_results_[0];
auto best_time = std::numeric_limits<double>::max();
for (auto &tuning_result: pimpl->tuning_results_) {
if (tuning_result.status && best_time >= tuning_result.time) {
best_result = tuning_result;
best_time = tuning_result.time;
}
}
const auto best_result = pimpl->GetBestResult();
const auto best_time = best_result.time;

// Prints the best result in C++ database format
auto count = size_t{0};
Expand Down
16 changes: 12 additions & 4 deletions src/searchers/annealing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ KernelInfo::Configuration Annealing::GetConfiguration() {
void Annealing::CalculateNextIndex() {

// Computes the new temperature
auto progress = num_visited_states_ / static_cast<double>(NumConfigurations());
const auto num_configurations = static_cast<double>(NumConfigurations());
if (num_configurations == 0.0) {
throw std::runtime_error("Running annealing with 0 configurations, aborting");
}
auto progress = num_visited_states_ / num_configurations;
auto temperature = max_temperature_ * (1.0 - progress);

// Determines whether to continue with the neighbour or with the current ID
Expand All @@ -89,7 +93,8 @@ void Annealing::CalculateNextIndex() {

// Computes the new neighbour state
auto neighbours = GetNeighboursOf(current_state_);
neighbour_state_ = neighbours[static_cast<size_t>(int_distribution_(generator_))%neighbours.size()];
const auto random_integer = static_cast<size_t>(std::abs(int_distribution_(generator_)));
neighbour_state_ = neighbours[random_integer % neighbours.size()];

// Checks whether this neighbour was already visited. If so, calculate a new neighbour instead.
// This continues up to a maximum number, because all neighbours might already be visited. In
Expand All @@ -109,7 +114,7 @@ void Annealing::CalculateNextIndex() {

// The number of configurations is equal to all possible configurations
size_t Annealing::NumConfigurations() {
return std::max(size_t{1}, static_cast<size_t>(configurations_.size()*fraction_));
return std::max(size_t{1}, static_cast<size_t>(static_cast<double>(configurations_.size())*fraction_));
}

// =================================================================================================
Expand Down Expand Up @@ -142,11 +147,14 @@ std::vector<size_t> Annealing::GetNeighboursOf(const size_t reference_id) const
}

// Consider this configuration a neighbour if there is at most a certain amount of differences
if (differences == kMaxDifferences) {
if (differences <= kMaxDifferences) {
neighbours.push_back(other_id);
}
++other_id;
}
if (neighbours.size() == 0) {
throw std::runtime_error("Running annealing and found no neighbours, aborting");
}
return neighbours;
}

Expand Down
20 changes: 20 additions & 0 deletions src/tuner_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,11 @@ TunerImpl::TunerResult TunerImpl::RunKernel(const std::string &source, const Ker
auto global = kernel.global();
auto local = kernel.local();

// Makes sure that the global size is a multiple of the local
for (auto i=size_t{0}; i<global.size(); ++i) {
global[i] = Ceil(global[i], local[i]);
}

// Verifies the local memory usage of the kernel
auto local_mem_usage = tune_kernel.LocalMemUsage(device_);
if (!device_.IsLocalMemoryValid(local_mem_usage)) {
Expand Down Expand Up @@ -646,6 +651,21 @@ void TunerImpl::PrintResult(FILE* fp, const TunerResult &result, const std::stri

// =================================================================================================

// Finds the best result
TunerImpl::TunerResult TunerImpl::GetBestResult() const {
auto best_result = tuning_results_[0];
auto best_time = std::numeric_limits<double>::max();
for (auto &tuning_result: tuning_results_) {
if (tuning_result.status && best_time >= tuning_result.time) {
best_result = tuning_result;
best_time = tuning_result.time;
}
}
return best_result;
}

// =================================================================================================

// Loads a file into a stringstream and returns the result as a string
std::string TunerImpl::LoadFile(const std::string &filename) {
std::ifstream file(filename);
Expand Down

0 comments on commit 6b7c50b

Please sign in to comment.