Skip to content

Commit

Permalink
samples/hip-cuda: convert tests to report also Best iteration time
Browse files Browse the repository at this point in the history
original code reports Average iteration time
  • Loading branch information
franz committed May 10, 2023
1 parent 7436951 commit fc921ca
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 119 deletions.
53 changes: 24 additions & 29 deletions samples/hip-cuda/BinomialOption/BinomialOption.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class BinomialOption
{
double setupTime; /**< Time taken to setup resources and building kernel */
double kernelTime; /**< Time taken to run kernel and read result back */
float bestIterTimeMS;
int numSamples; /**< No. of samples*/
unsigned int samplesPerVectorWidth; /**< No. of samples per vector width */
unsigned int numSteps; /**< No. of time steps*/
Expand All @@ -75,18 +76,14 @@ class BinomialOption
* Initialize member variables
*/
BinomialOption()
: setupTime(0),
kernelTime(0),
randArray(NULL),
output(NULL),
refOutput(NULL),
iterations(1)
{
numSamples = 256;
numSteps = 254;
sampleArgs = new HIPCommandArgs() ;
sampleTimer = new SDKTimer();
sampleArgs->sampleVerStr = SAMPLE_VERSION;
: setupTime(0), kernelTime(0),
bestIterTimeMS(std::numeric_limits<float>::max()),
randArray(NULL), output(NULL), refOutput(NULL), iterations(1) {
numSamples = 256;
numSteps = 254;
sampleArgs = new HIPCommandArgs();
sampleTimer = new SDKTimer();
sampleArgs->sampleVerStr = SAMPLE_VERSION;
}

inline long long get_time()
Expand Down Expand Up @@ -347,7 +344,7 @@ BinomialOption::runKernels()

hipEventCreate(&start);
hipEventCreate(&stop);
float eventMs = 1.0f;
float eventMs = 1000000.0f;

unsigned int localThreads = {numSteps + 1};

Expand All @@ -360,14 +357,15 @@ BinomialOption::runKernels()
0, 0,
numSteps ,(float4*)randBuffer ,(float4*)outBuffer);

hipMemcpy(output, dout, samplesPerVectorWidth * sizeof(float4),
hipMemcpyDeviceToHost);

hipEventRecord(stop, NULL);
hipEventSynchronize(stop);

hipEventElapsedTime(&eventMs, start, stop);

// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);

hipMemcpy(output, dout, samplesPerVectorWidth * sizeof(float4), hipMemcpyDeviceToHost);
if (eventMs < bestIterTimeMS)
bestIterTimeMS = eventMs;

return SDK_SUCCESS;
}
Expand Down Expand Up @@ -539,8 +537,8 @@ int BinomialOption::run()

if(!sampleArgs->quiet)
{
printArray<float>("input", randArray, numSamples, 1);
printArray<float>("Output", output, numSamples, 1);
printArray<float>("input", randArray, numSamples, 1);
printArray<float>("Output", output, numSamples, 1);
}

return SDK_SUCCESS;
Expand Down Expand Up @@ -589,23 +587,20 @@ void BinomialOption::printStats()
{
if(sampleArgs->timing)
{
std::string strArray[4] =
{
"Option Samples",
"Time(sec)",
"Transfer+kernel(sec)" ,
"Options/sec"
};
std::string strArray[5] = {"Option Samples", "Time(sec)",
"Transfer+kernel(sec)",
"Best Iter. time (msec)", "Options/sec"};

sampleTimer->totalTime = setupTime + kernelTime;

std::string stats[4];
std::string stats[5];
stats[0] = toString(numSamples, std::dec);
stats[1] = toString(sampleTimer->totalTime, std::dec);
stats[2] = toString(kernelTime, std::dec);
stats[3] = toString(numSamples / sampleTimer->totalTime, std::dec);
stats[3] = toString(bestIterTimeMS, std::dec);
stats[4] = toString(numSamples / sampleTimer->totalTime, std::dec);

printStatistics(strArray, stats, 4);
printStatistics(strArray, stats, 5);
}
}

Expand Down
32 changes: 20 additions & 12 deletions samples/hip-cuda/BitonicSort/BitonicSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class BitonicSort
double totalKernelTime; /**< Time for kernel execution */
double totalProgramTime; /**< Time for program execution */
double referenceKernelTime; /**< Time for reference implementation */
float bestIterTimeMS;
unsigned int sortFlag; /**< Flag to indicate sorting order */
std::string sortOrder; /**< Argument to indicate sorting order */
unsigned int *input; /**< Input array */
Expand All @@ -69,6 +70,7 @@ class BitonicSort
length = 32768;
setupTime = 0;
totalKernelTime = 0;
bestIterTimeMS = std::numeric_limits<float>::max();
iterations = 1;
sampleArgs = new HIPCommandArgs() ;
sampleTimer = new SDKTimer();
Expand Down Expand Up @@ -270,7 +272,7 @@ BitonicSort::runKernels(void)

hipEventCreate(&start);
hipEventCreate(&stop);
float eventMs = 1.0f;
float eventMs = 1000000.0f;

unsigned int numStages = 0;
unsigned int temp;
Expand Down Expand Up @@ -331,28 +333,31 @@ BitonicSort::runKernels(void)
sortFlag = 0;
}

hipEventRecord(start, NULL);
for(stage = 0; stage < numStages; ++stage)
{

for(passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
{
hipEventRecord(start, NULL);

hipLaunchKernelGGL(bitonicSort,
dim3(globalThreads/localThreads),
dim3(localThreads),
0, 0,
inputBuffer ,stage, passOfStage ,sortFlag);

hipEventRecord(stop, NULL);
hipEventSynchronize(stop);

hipEventElapsedTime(&eventMs, start, stop);

// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
}
}
hipMemcpy(input, din,length * sizeof(unsigned int), hipMemcpyDeviceToHost);
hipMemcpy(input, din, length * sizeof(unsigned int), hipMemcpyDeviceToHost);

hipEventRecord(stop, NULL);
hipEventSynchronize(stop);

hipEventElapsedTime(&eventMs, start, stop);
if (eventMs < bestIterTimeMS)
bestIterTimeMS = eventMs;

return SDK_SUCCESS;
}

Expand Down Expand Up @@ -586,17 +591,20 @@ void BitonicSort::printStats()
{
if(sampleArgs->timing)
{
std::string strArray[4] = {"Elements", "Setup Time (sec)", "Avg. Kernel Time (sec)", "Elements/sec"};
std::string stats[4];
std::string strArray[5] = {"Elements", "Setup Time (sec)",
"Avg. Kernel Time (sec)",
"Best Iter. Time (msec)", "Elements/sec"};
std::string stats[5];

sampleTimer->totalTime = ( totalKernelTime/ iterations );

stats[0] = toString(length, std::dec);
stats[1] = toString(setupTime, std::dec);
stats[2] = toString(sampleTimer->totalTime, std::dec);
stats[3] = toString(( length/sampleTimer->totalTime ), std::dec);
stats[3] = toString(bestIterTimeMS, std::dec);
stats[4] = toString((length / sampleTimer->totalTime), std::dec);

printStatistics(strArray, stats, 4);
printStatistics(strArray, stats, 5);
}
}
int BitonicSort::cleanup()
Expand Down
25 changes: 19 additions & 6 deletions samples/hip-cuda/DCT/DCT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class DCT
double totalKernelTime; /**< Time for kernel execution */
double totalProgramTime; /**< Time for program execution */
double referenceKernelTime; /**< Time for reference implementation */
float bestIterTimeMS;
int width; /**< Width of the input array */
int height; /**< height of the input array */
float *input; /**< Input array */
Expand Down Expand Up @@ -112,6 +113,7 @@ class DCT
inverse = 0;
setupTime = 0;
totalKernelTime = 0;
bestIterTimeMS = std::numeric_limits<float>::max();
iterations = 1;
sampleArgs = new HIPCommandArgs() ;
sampleTimer = new SDKTimer();
Expand Down Expand Up @@ -385,7 +387,7 @@ int
DCT::runKernels(void)
{
hipEvent_t start, stop;
float eventMs = 1.0f;
float eventMs = 1000000.0f;

hipEventCreate(&start);
hipEventCreate(&stop);
Expand All @@ -399,15 +401,18 @@ DCT::runKernels(void)
0, 0,
outputBuffer ,inputBuffer ,dctBuffer, dct_transBuffer, width, blockWidth, inverse );

hipMemcpy(output, dout, sizeof(float) * width * height,
hipMemcpyDeviceToHost);

hipEventRecord(stop, NULL);
hipEventSynchronize(stop);


hipEventElapsedTime(&eventMs, start, stop);
if (eventMs < bestIterTimeMS)
bestIterTimeMS = eventMs;

// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);

hipMemcpy(output, dout,sizeof(float) * width * height, hipMemcpyDeviceToHost);

return SDK_SUCCESS;
}
Expand Down Expand Up @@ -656,19 +661,27 @@ void DCT::printStats()
{
if(sampleArgs->timing)
{
std::string strArray[4] = {"Width", "Height", "Time(sec)", "[Transfer+Kernel]Time(sec)"};
std::string stats[4];
std::string strArray[5] = {
"Width",
"Height",
"Time(sec)",
"[Transfer+Kernel]Time(sec)",
"Best Iter. time (msec)",
};
std::string stats[5];

sampleTimer->totalTime = setupTime + totalKernelTime;

stats[0] = toString(width , std::dec);
stats[1] = toString(height , std::dec);
stats[2] = toString(sampleTimer->totalTime, std::dec);
stats[3] = toString(totalKernelTime, std::dec);
stats[4] = toString(bestIterTimeMS, std::dec);

printStatistics(strArray, stats, 4);
printStatistics(strArray, stats, 5);
}
}

int DCT::cleanup()
{

Expand Down
47 changes: 27 additions & 20 deletions samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class FastWalshTransform
double setupTime; /**< Time for setting up OpenCL */
double totalKernelTime; /**< Time for kernel execution */
double totalProgramTime; /**< Time for program execution */
float bestIterTimeMS;
double referenceKernelTime; /**< Time for reference implementation */
int length; /**< Length of the input array */
float *input; /**< Input array */
Expand All @@ -70,6 +71,7 @@ class FastWalshTransform
verificationInput = NULL;
setupTime = 0;
totalKernelTime = 0;
bestIterTimeMS = std::numeric_limits<float>::max();
iterations = 1;
sampleArgs = new HIPCommandArgs() ;
sampleTimer = new SDKTimer();
Expand Down Expand Up @@ -191,7 +193,10 @@ FastWalshTransform::runKernels(void)

hipEventCreate(&start);
hipEventCreate(&stop);
float eventMs = 1.0f;
float eventMs = 10000000.0f;

// Record the start event
hipEventRecord(start, NULL);

float *din;
hipHostGetDevicePointer((void**)&din, inputBuffer,0);
Expand All @@ -201,28 +206,24 @@ FastWalshTransform::runKernels(void)
int globalThreads = length / 2;
int localThreads = 256;

for(int step = 1; step < length; step <<= 1)
{
// Record the start event
hipEventRecord(start, NULL);
for(int step = 1; step < length; step <<= 1) {
hipLaunchKernelGGL(fastWalshTransform,
dim3(globalThreads/localThreads),
dim3(localThreads),
0, 0,
inputBuffer ,step);
}

hipLaunchKernelGGL(fastWalshTransform,
dim3(globalThreads/localThreads),
dim3(localThreads),
0, 0,
inputBuffer ,step);
hipMemcpy(output, din, length * sizeof(float), hipMemcpyDeviceToHost);

hipEventRecord(stop, NULL);
hipEventSynchronize(stop);

hipEventElapsedTime(&eventMs, start, stop);
if (eventMs < bestIterTimeMS)
bestIterTimeMS = eventMs;

// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
}

hipMemcpy(output, din, length * sizeof(float), hipMemcpyDeviceToHost);

return SDK_SUCCESS;
return SDK_SUCCESS;
}

/*
Expand Down Expand Up @@ -408,16 +409,22 @@ FastWalshTransform::printStats()
{
if(sampleArgs->timing)
{
std::string strArray[3] = {"Length", "Time(sec)", "[Transfer+Kernel]Time(sec)"};
std::string stats[3];
std::string strArray[4] = {
"Length",
"Time(sec)",
"[Transfer+Kernel]Time(sec)",
"Best Iter. time (msec)",
};
std::string stats[4];

sampleTimer->totalTime = setupTime + totalKernelTime ;
sampleTimer->totalTime = setupTime + totalKernelTime;

stats[0] = toString(length, std::dec);
stats[1] = toString(sampleTimer->totalTime, std::dec);
stats[2] = toString(totalKernelTime, std::dec);
stats[3] = toString(bestIterTimeMS, std::dec);

printStatistics(strArray, stats, 3);
printStatistics(strArray, stats, 4);
}
}
int
Expand Down
Loading

0 comments on commit fc921ca

Please sign in to comment.