Skip to content

Commit

Permalink
[src] auto format cpp files
Browse files Browse the repository at this point in the history
  • Loading branch information
krrishnarraj committed Oct 20, 2019
1 parent d627418 commit e0cf442
Show file tree
Hide file tree
Showing 15 changed files with 267 additions and 221 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
build/
.vscode/
82 changes: 43 additions & 39 deletions src/clpeak.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,34 @@
#define MSTRINGIFY(...) #__VA_ARGS__

static const char *stringifiedKernels =
#include "global_bandwidth_kernels.cl"
#include "compute_sp_kernels.cl"
#include "compute_hp_kernels.cl"
#include "compute_dp_kernels.cl"
#include "compute_integer_kernels.cl"
#include "global_bandwidth_kernels.cl"
#include "compute_sp_kernels.cl"
#include "compute_hp_kernels.cl"
#include "compute_dp_kernels.cl"
#include "compute_integer_kernels.cl"
;

#ifdef USE_STUB_OPENCL
// Prototype
extern "C" {
void stubOpenclReset();
extern "C"
{
void stubOpenclReset();
}
#endif


clPeak::clPeak(): forcePlatform(false), forceDevice(false), useEventTimer(false),
isGlobalBW(true), isComputeSP(true), isComputeDP(true), isComputeInt(true),
isTransferBW(true), isKernelLatency(true),
specifiedPlatform(0), specifiedDevice(0)
clPeak::clPeak() : forcePlatform(false), forceDevice(false), useEventTimer(false),
isGlobalBW(true), isComputeSP(true), isComputeDP(true), isComputeInt(true),
isTransferBW(true), isKernelLatency(true),
specifiedPlatform(0), specifiedDevice(0)
{
}

clPeak::~clPeak()
{
if(log) delete log;
if (log)
{
delete log;
}
}

int clPeak::runAll()
Expand All @@ -43,9 +46,9 @@ int clPeak::runAll()

log->xmlOpenTag("clpeak");
log->xmlAppendAttribs("os", OS_NAME);
for(size_t p=0; p < platforms.size(); p++)
for (size_t p = 0; p < platforms.size(); p++)
{
if(forcePlatform && (p != specifiedPlatform))
if (forcePlatform && (p != specifiedPlatform))
continue;

std::string platformName = platforms[p].getInfo<CL_PLATFORM_NAME>();
Expand All @@ -56,30 +59,32 @@ int clPeak::runAll()
log->xmlAppendAttribs("name", platformName);

cl_context_properties cps[3] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platforms[p])(),
0
};
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platforms[p])(),
0};

cl::Context ctx(CL_DEVICE_TYPE_ALL, cps);
vector<cl::Device> devices = ctx.getInfo<CL_CONTEXT_DEVICES>();
cl::Program::Sources source(1, make_pair(stringifiedKernels, (strlen(stringifiedKernels)+1)));
cl::Program::Sources source(1, make_pair(stringifiedKernels, (strlen(stringifiedKernels) + 1)));
cl::Program prog = cl::Program(ctx, source);

for(size_t d=0; d < devices.size(); d++)
for (size_t d = 0; d < devices.size(); d++)
{
if(forceDevice && (d != specifiedDevice))
if (forceDevice && (d != specifiedDevice))
continue;

device_info_t devInfo = getDeviceInfo(devices[d]);

log->print(TAB "Device: " + devInfo.deviceName + NEWLINE);
log->print(TAB TAB "Driver version : ");
log->print(devInfo.driverVersion); log->print(" (" OS_NAME ")" NEWLINE);
log->print(devInfo.driverVersion);
log->print(" (" OS_NAME ")" NEWLINE);
log->print(TAB TAB "Compute units : ");
log->print(devInfo.numCUs); log->print(NEWLINE);
log->print(devInfo.numCUs);
log->print(NEWLINE);
log->print(TAB TAB "Clock frequency : ");
log->print(devInfo.maxClockFreq); log->print(" MHz" NEWLINE);
log->print(devInfo.maxClockFreq);
log->print(" MHz" NEWLINE);
log->xmlOpenTag("device");
log->xmlAppendAttribs("name", devInfo.deviceName);
log->xmlAppendAttribs("driver_version", devInfo.driverVersion);
Expand All @@ -95,8 +100,7 @@ int clPeak::runAll()
catch (cl::Error &error)
{
UNUSED(error);
log->print(TAB TAB "Build Log: " + prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[d])
+ NEWLINE NEWLINE);
log->print(TAB TAB "Build Log: " + prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[d]) + NEWLINE NEWLINE);
continue;
}

Expand All @@ -111,34 +115,33 @@ int clPeak::runAll()
runKernelLatency(queue, prog, devInfo);

log->print(NEWLINE);
log->xmlCloseTag(); // device
log->xmlCloseTag(); // device
}
log->xmlCloseTag(); // platform
log->xmlCloseTag(); // platform
}
log->xmlCloseTag(); // clpeak
log->xmlCloseTag(); // clpeak
}
catch(cl::Error &error)
catch (cl::Error &error)
{
stringstream ss;
ss << error.what() << " (" << error.err() << ")" NEWLINE;

log->print(ss.str());

// skip error for no platform
if(strcmp(error.what(), "clGetPlatformIDs") == 0)
if (strcmp(error.what(), "clGetPlatformIDs") == 0)
{
log->print("no platforms found" NEWLINE);
log->print("no platforms found" NEWLINE);
}
else
{
return -1;
return -1;
}
}

return 0;
}


float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRange &globalSize, cl::NDRange &localSize, uint iters)
{
float timed = 0;
Expand All @@ -148,22 +151,23 @@ float clPeak::run_kernel(cl::CommandQueue &queue, cl::Kernel &kernel, cl::NDRang
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
queue.finish();

if(useEventTimer)
if (useEventTimer)
{
for(uint i=0; i<iters; i++)
for (uint i = 0; i < iters; i++)
{
cl::Event timeEvent;

queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize, NULL, &timeEvent);
queue.finish();
timed += timeInUS(timeEvent);
}
} else // std timer
}
else // std timer
{
Timer timer;

timer.start();
for(uint i=0; i<iters; i++)
for (uint i = 0; i < iters; i++)
{
queue.enqueueNDRangeKernel(kernel, cl::NullRange, globalSize, localSize);
queue.flush();
Expand Down
28 changes: 13 additions & 15 deletions src/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ device_info_t getDeviceInfo(cl::Device &d)
// FIXME limit max-workgroup size for qualcomm platform to 128
// Kernel launch fails for workgroup size 256(CL_DEVICE_MAX_WORK_ITEM_SIZES)
string vendor = d.getInfo<CL_DEVICE_VENDOR>();
if( (vendor.find("QUALCOMM") != std::string::npos) ||
(vendor.find("qualcomm") != std::string::npos) )
if ((vendor.find("QUALCOMM") != std::string::npos) ||
(vendor.find("qualcomm") != std::string::npos))
{
devInfo.maxWGSize = MIN(devInfo.maxWGSize, 128);
}
Expand All @@ -40,22 +40,25 @@ device_info_t getDeviceInfo(cl::Device &d)

std::string extns = d.getInfo<CL_DEVICE_EXTENSIONS>();

if((extns.find("cl_khr_fp16") != std::string::npos))
if ((extns.find("cl_khr_fp16") != std::string::npos))
devInfo.halfSupported = true;

if((extns.find("cl_khr_fp64") != std::string::npos) || (extns.find("cl_amd_fp64") != std::string::npos))
if ((extns.find("cl_khr_fp64") != std::string::npos) || (extns.find("cl_amd_fp64") != std::string::npos))
devInfo.doubleSupported = true;

devInfo.deviceType = d.getInfo<CL_DEVICE_TYPE>();

if(devInfo.deviceType & CL_DEVICE_TYPE_CPU) {
if (devInfo.deviceType & CL_DEVICE_TYPE_CPU)
{
devInfo.gloalBWIters = 20;
devInfo.globalBWMaxSize = 1 << 27;
devInfo.computeWgsPerCU = 512;
devInfo.computeDPWgsPerCU = 256;
devInfo.computeIters = 10;
devInfo.transferBWMaxSize = 1 << 27;
} else { // GPU
}
else
{ // GPU
devInfo.gloalBWIters = 50;
devInfo.globalBWMaxSize = 1 << 29;
devInfo.computeWgsPerCU = 2048;
Expand All @@ -69,7 +72,6 @@ device_info_t getDeviceInfo(cl::Device &d)
return devInfo;
}


float timeInUS(cl::Event &timeEvent)
{
cl_ulong start = timeEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() / 1000;
Expand All @@ -78,25 +80,22 @@ float timeInUS(cl::Event &timeEvent)
return (float)((int)end - (int)start);
}


void Timer::start()
{
tick = chrono::high_resolution_clock::now();
}


float Timer::stopAndTime()
{
tock = chrono::high_resolution_clock::now();
return (float)(chrono::duration_cast<chrono::microseconds>(tock - tick).count());
}


void populate(float *ptr, uint64_t N)
{
srand((unsigned int)time(NULL));

for(uint64_t i=0; i<N; i++)
for (uint64_t i = 0; i < N; i++)
{
//ptr[i] = (float)rand();
ptr[i] = (float)i;
Expand All @@ -107,25 +106,24 @@ void populate(double *ptr, uint64_t N)
{
srand((unsigned int)time(NULL));

for(uint64_t i=0; i<N; i++)
for (uint64_t i = 0; i < N; i++)
{
//ptr[i] = (double)rand();
ptr[i] = (double)i;
}
}


uint64_t roundToMultipleOf(uint64_t number, uint64_t base, uint64_t maxValue)
{
uint64_t n = (number > maxValue)? maxValue: number;
uint64_t n = (number > maxValue) ? maxValue : number;
return (n / base) * base;
}

void trimString(std::string &str)
{
size_t pos = str.find('\0');

if(pos != std::string::npos)
if (pos != std::string::npos)
{
str.erase(pos);
}
Expand Down
26 changes: 15 additions & 11 deletions src/compute_dp.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include <clpeak.h>


int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
float timed, gflops;
Expand All @@ -9,10 +8,10 @@ int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info
cl_double A = 1.3f;
uint iters = devInfo.computeIters;

if(!isComputeDP)
if (!isComputeDP)
return 0;

if(!devInfo.doubleSupported)
if (!devInfo.doubleSupported)
{
log->print(NEWLINE TAB TAB "No double precision support! Skipped" NEWLINE);
return 0;
Expand Down Expand Up @@ -54,13 +53,14 @@ int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info
// Vector width 1
log->print(TAB TAB TAB "double : ");

workPerWI = 4096; // Indicates flops executed per work-item
workPerWI = 4096; // Indicates flops executed per work-item

timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters);

gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

log->print(gflops); log->print(NEWLINE);
log->print(gflops);
log->print(NEWLINE);
log->xmlRecord("double", gflops);
///////////////////////////////////////////////////////////////////////////

Expand All @@ -73,7 +73,8 @@ int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info

gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

log->print(gflops); log->print(NEWLINE);
log->print(gflops);
log->print(NEWLINE);
log->xmlRecord("double2", gflops);
///////////////////////////////////////////////////////////////////////////

Expand All @@ -86,7 +87,8 @@ int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info

gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

log->print(gflops); log->print(NEWLINE);
log->print(gflops);
log->print(NEWLINE);
log->xmlRecord("double4", gflops);
///////////////////////////////////////////////////////////////////////////

Expand All @@ -98,7 +100,8 @@ int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info

gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

log->print(gflops); log->print(NEWLINE);
log->print(gflops);
log->print(NEWLINE);
log->xmlRecord("double8", gflops);
///////////////////////////////////////////////////////////////////////////

Expand All @@ -111,12 +114,13 @@ int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info

gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

log->print(gflops); log->print(NEWLINE);
log->print(gflops);
log->print(NEWLINE);
log->xmlRecord("double16", gflops);
///////////////////////////////////////////////////////////////////////////
log->xmlCloseTag(); // double_precision_compute
log->xmlCloseTag(); // double_precision_compute
}
catch(cl::Error &error)
catch (cl::Error &error)
{
stringstream ss;
ss << error.what() << " (" << error.err() << ")" NEWLINE
Expand Down
Loading

0 comments on commit e0cf442

Please sign in to comment.